You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

btrie.c 79KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955
  1. /* Level-Compressed Tree Bitmap (LC-TBM) Trie implementation
  2. *
  3. * Contributed by Geoffrey T. Dairiki <dairiki@dairiki.org>
  4. *
  5. * This file is released under a "Three-clause BSD License".
  6. *
  7. * Copyright (c) 2013, Geoffrey T. Dairiki
  8. * All rights reserved.
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. *
  17. * * Redistributions in binary form must reproduce the above
  18. * copyright notice, this list of conditions and the following
  19. * disclaimer in the documentation and/or other materials provided
  20. * with the distribution.
  21. *
  22. * * Neither the name of Geoffrey T. Dairiki nor the names of other
  23. * contributors may be used to endorse or promote products derived
  24. * from this software without specific prior written permission.
  25. *
  26. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  27. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  28. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  29. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GEOFFREY
  30. * T. DAIRIKI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  31. * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  32. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  33. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  34. * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  35. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  36. * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  37. * DAMAGE.
  38. */
  39. /*****************************************************************
  40. *
  41. * This code implements a routing table conceptually based on a binary
  42. * trie structure. Internally, the trie is represented by two types
  43. * of compound nodes: "multibit nodes", which contain the top few
  44. * levels of an entire binary subtree; and "level compression" (LC)
  45. * nodes which represent a (potentially long) chain of out-degree one
  46. * (single child) binary nodes (possibly ending at a terminal node).
  47. *
  48. * The multibit nodes are represented using a "Tree Bitmap" structure
  49. * (more on this below), which is very efficient --- both in terms of
  50. * memory usage and lookup speed --- at representing densely branching
  51. * parts of the trie. The LC nodes can efficiently represent long
  52. * non-branching chains of binary trie nodes. Using both node types
  53. * together results in efficient representation of both the sparse and
  54. * dense parts of a binary trie.
  55. *
  56. * Graphically, here's the rough idea:
  57. *
  58. * ........
  59. * .LC o .
  60. * . / . LC nodes can
  61. * . o . <= represent long chains
  62. * . \ . of (non-branching) binary
  63. * . o . trie nodes
  64. * . / .
  65. * . o .
  66. * ......../.....
  67. * .TBM o .
  68. * . / \ . TBM nodes can represent
  69. * . o * . <= several levels of densely
  70. * . / \ . branching binary trie nodes
  71. * . o o .
  72. * ......./.....\.......
  73. * .TBM o .. o LC.
  74. * . / \ .. \ .
  75. * . o o .. o .
  76. * . / / \ .. \ .
  77. * . * o *.. o .
  78. * ...../....... / .
  79. * . o LC. . o .
  80. * . \ . .....\......
  81. * . * . . o TBM.
  82. * ........ . / \ .
  83. * . o o .
  84. * . / \ \ .
  85. * .* * *.
  86. * ...........
  87. *
  88. * Terminology
  89. * -----------
  90. *
  91. * node
  92. * Usually, in the comments below, "node" will be used to refer to
  93. * a compound node: either a multibit (TBM) node or an LC node.
  94. *
  95. * "internal node" or "prefix"
  96. * The terms "prefix" or "internal node" are used to refer to
  97. * a node in the binary trie which is internal to a multibit (TBM)
  98. * node.
  99. *
  100. * ----------------------------------------------------------------
  101. *
  102. * Internal Representation of the Nodes
  103. * ====================================
  104. *
  105. * Multibit (TBM) Nodes
  106. * ~~~~~~~~~~~~~~~~~~~~
  107. *
  108. * The multibit nodes are represented using a "Tree Bitmap" (TBM)
  109. * structure as described by Eatherton, Dittia and Varghese[1]. See
  110. * the paper referenced below for basic details.
  111. *
  112. * A multibit node, represents several levels of a binary trie.
  113. * For example, here is a multibit node of stride 2 (which represent
  114. * two levels of a binary trie.
  115. *
  116. * +------- | ------+
  117. * | multi o |
  118. * | bit / \ |
  119. * | node / \ |
  120. * | o * |
  121. * +--- / \ - / \ --+
  122. * O
  123. *
  124. * Note that, for a multibit node of stride S, there are 2^S - 1 internal
  125. * nodes, each of which may have data (or not) associated with them, and
  126. * 2^S "external paths" leading to other (possibly compound nodes).
  127. * (In the diagram above, one of three internal node (the one denoted by "*")
  128. * has data, and one of four extending paths leads to an external node
  129. * (denoted by the 'O').)
  130. *
  131. * The TBM structure can represent these bitmaps in a very memory-efficient
  132. * manner.
  133. *
  134. * Each TBM node consists of two bitmaps --- the "internal bitmap" and the
  135. * "extending paths bitmap" --- and a pointer which points to an array
  136. * which contains both the extending path ("child") nodes and any
  137. * internal prefix data for the TBM node.
  138. *
  139. * +--------+--------+
  140. * TBM | ext bm | int bm |
  141. * Node +--------+--------+
  142. * | pointer |----+
  143. * +-----------------+ |
  144. * |
  145. * |
  146. * +-----------------+ |
  147. * | extending path | |
  148. * | node[N-1] | |
  149. * +-----------------+ |
  150. * / ... / |
  151. * / ... / |
  152. * +-----------------+ |
  153. * | extending path | |
  154. * | node[0] | |
  155. * +-----------------+<---+
  156. * | int. data[M-1] |
  157. * +-----------------+
  158. * / ... /
  159. * +-----------------+
  160. * | int. data[0] |
  161. * +-----------------+
  162. *
  163. * The extending paths bitmap (or "ext bitmap") has one bit for each
  164. * possible "extending path" from the bottom of the multibit node. To
  165. * check if a particular extending path is present, one checks to see if
  166. * the corresponding bit is set in the ext bitmap. The index into the
  167. * array of children for that path can be found by counting the number
  168. * of set bits to the left of that bit.
  169. *
  170. * Similarly, the internal bitmap has one bit for each binary node
  171. * which is internal to the multibit node. To determine whether there
  172. * is data stored for an internal prefix, one checks the corresponding
  173. * bit in the internal bitmap. As for extending paths, the index into
  174. * the array of internal data is found by counting the number of set
  175. * bits to the left of that bit.
  176. *
  177. * To save space in the node structure, the node data array is stored
  178. * contiguously with the node extending path array. The single
  179. * ("children") pointer in the TBM structure points to the beginning
  180. * of the array of extending path nodes and to (one past) the end of
  181. * the the internal data array.
  182. *
  183. * The multibit stride is chosen so that the entire TBM node structure fits
  184. * in the space of two pointers. On 32 bit machines this means the stride
  185. * is four (each of the two bitmaps is 16 bits); on 32 bit machines the
  186. * stride is five.
  187. *
  188. * Note that there are only 2^stride - 1 internal prefixes in a TBM
  189. * node. That means there is one unused bit in the internal bitmap.
  190. * We require that that bit must always be clear for a TBM node. (If
  191. * set, it indicates that the structure represents, instead, an LC
  192. * node. See below.)
  193. *
  194. * ----------------------------------------------------------------
  195. *
  196. * Level Compression (LC) Nodes
  197. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  198. *
  199. * LC nodes are used to represent a chain of out-degree-one (single
  200. * child) prefixes in the binary trie. The are represented by a bit
  201. * string (the "relative prefix") along with its length and a pointer
  202. * to the extending path (the next node past the LC node.)
  203. *
  204. *
  205. * Non-Terminal LC Node:
  206. *
  207. * +------------------+-------+
  208. * | relative prefix |1|0|len|
  209. * +------------------+-------+
  210. * | ptr.child |--+
  211. * +--------------------------+ |
  212. * |
  213. * |
  214. * +--------------------------+ |
  215. * | Next node - | |
  216. * | either LC or TBM | |
  217. * | | |
  218. * +--------------------------+<-+
  219. *
  220. * The Relative Prefix
  221. * -------------------
  222. *
  223. * The maximum relative prefix per LC node is selected so that (again)
  224. * the entire node structure fits in the space of two pointers. On 32 bit
  225. * machines, the maximum relative prefix is 24 bits; on 62 bit machines
  226. * the limit is 56 bits.
  227. *
  228. * In the LC node structure, the relative prefix is stored as an array
  229. * of bytes. To avoid some bit-shifting during tree searches, these
  230. * bytes are byte-aligned with the global prefix. In other words, in
  231. * general there are (pos % 8) "pad" bits at the beginning of the
  232. * relative prefix --- where pos "starting bit" (or depth in the
  233. * binary tree) of the LC node --- which really belong to the parent
  234. * node(s) of the LC node. For efficiency (so that we don't have to
  235. * mask them out when matching) we require that these pad bits be
  236. * correct --- they must match the path which leads to the LC node.
  237. *
  238. * The relative prefix length stored in the LC node structure does not
  239. * count the pad bits.
  240. *
  241. * Terminal Node Compression
  242. * -------------------------
  243. *
  244. * For memory efficiency, we also support "terminal LC" nodes. When
  245. * the extension path from an LC node consists a single terminal node,
  246. * we store that terminal nodes data directly in the parent LC node.
  247. *
  248. * Instead of this:
  249. *
  250. * +------------------+-------+
  251. * | relative prefix |1|0|len|
  252. * +------------------+-------+
  253. * | ptr.child |--+
  254. * +--------------------------+ |
  255. * |
  256. * +--------------------------+ |
  257. * | Terminal Node (TBM node, | |
  258. * | empty except for the | |
  259. * +--| root internal node.) | |
  260. * | +--------------------------+<-+
  261. * |
  262. * +->+--------------------------+
  263. * | terminal node data |
  264. * +--------------------------+
  265. *
  266. * We can do this:
  267. *
  268. * +------------------+-------+
  269. * | relative prefix |1|1|len|
  270. * +------------------+-------+
  271. * | terminal node data |
  272. * +--------------------------+
  273. *
  274. * Terminal LC nodes are differentiated from non-terminal LC nodes
  275. * by the setting of the is_terminal flag.
  276. *
  277. * Node Structure Packing Details
  278. * ------------------------------
  279. *
  280. * The LC and TBM node structures are carefully packed so that the
  281. * "is_lc" flag (which indicates that a node is an LC node)
  282. * corresponds to the one unused bit in the internal bitmap of the TBM
  283. * node structure (which we require to be zero for TBM nodes).
  284. *
  285. * ----------------------------------------------------------------
  286. *
  287. * References
  288. * ==========
  289. *
  290. * [1] Will Eatherton, George Varghese, and Zubin Dittia. 2004. Tree
  291. * bitmap: hardware/software IP lookups with incremental
  292. * updates. SIGCOMM Comput. Commun. Rev. 34, 2 (April 2004),
  293. * 97-122. DOI=10.1145/997150.997160
  294. * http://doi.acm.org/10.1145/997150.997160
  295. * http://comnet.kaist.ac.kr/yhlee/CN_2008_Spring/readings/Eath-04-tree_bitmap.pdf
  296. *
  297. ****************************************************************/
  298. #include <stdio.h>
  299. #include <stdlib.h>
  300. #include <string.h>
  301. #include <setjmp.h>
  302. #if defined(TEST) && defined(NDEBUG)
  303. #warning undefining NDEBUG for TEST build
  304. #undef NDEBUG
  305. #endif
  306. #include <assert.h>
  307. #include "btrie.h"
  308. #include "libutil/mem_pool.h"
  309. #ifdef __SIZEOF_POINTER__
  310. #define SIZEOF_VOID_P __SIZEOF_POINTER__
  311. #else
  312. #if defined(__ILP32__) || defined(__ILP32) || defined(_ILP32)
  313. #define SIZEOF_VOID_P 4
  314. #elif defined(__ILP64__) || defined(__ILP64) || defined(_ILP64)
  315. #define SIZEOF_VOID_P 8
  316. #elif defined(__LLP64__) || defined(__LLP64) || defined(_LLP64) || defined(_WIN64)
  317. #define SIZEOF_VOID_P 8
  318. #elif defined(__LP64__) || defined(__LP64) || defined(_LP64)
  319. #define SIZEOF_VOID_P 8
  320. #elif defined(UINTPTR_MAX) && defined(UINT64_MAX) && (UINTPTR_MAX == UINT64_MAX)
  321. #define SIZEOF_VOID_P 8
  322. #else
  323. #define SIZEOF_VOID_P 4
  324. #endif
  325. #endif
  326. #if SIZEOF_VOID_P == 4
  327. #define TBM_STRIDE 4
  328. #elif SIZEOF_VOID_P == 8
  329. #define TBM_STRIDE 5
  330. #else
  331. #error "Unsupported word size"
  332. #endif
  333. #ifndef NO_STDINT_H
  334. #if TBM_STRIDE == 4
  335. typedef uint16_t tbm_bitmap_t;
  336. #else
  337. typedef uint32_t tbm_bitmap_t;
  338. #endif
  339. #else /* NO_STDINT_H */
  340. #if TBM_STRIDE == 4
  341. #if SIZEOF_SHORT == 2
  342. typedef short unsigned tbm_bitmap_t;
  343. #else
  344. #error "can not determine type for 16 bit unsigned int"
  345. #endif
  346. #else /* TBM_STRIDE == 5 */
  347. #if SIZEOF_INT == 4
  348. typedef unsigned tbm_bitmap_t;
  349. #elif SIZEOF_LONG == 4
  350. typedef long unsigned tbm_bitmap_t;
  351. #else
  352. #error "can not determine type for 32 bit unsigned int"
  353. #endif
  354. #endif
  355. #endif
  356. #define TBM_FANOUT (1U << TBM_STRIDE)
  357. #define LC_BYTES_PER_NODE (SIZEOF_VOID_P - 1)
  358. typedef union node_u node_t;
  359. /* The tbm_node and lc_node structs must be packed so that the the
  360. * high bit (LC_FLAGS_IS_LC) of lc_flags in the the lc_node struct
  361. * coincides with bit zero (the most significant bit) of tbm_node's
  362. * int_bm. (This bit is how we differentiate between the two node
  363. * types. It is always clear for a tbm_node and always set for an
  364. * lc_node.)
  365. */
  366. struct tbm_node {
  367. #ifdef WORDS_BIGENDIAN
  368. tbm_bitmap_t int_bm; /* the internal bitmap */
  369. tbm_bitmap_t ext_bm; /* extending path ("external") bitmap */
  370. #else
  371. tbm_bitmap_t ext_bm; /* extending path ("external") bitmap */
  372. tbm_bitmap_t int_bm; /* the internal bitmap */
  373. #endif
  374. union {
  375. node_t *children; /* pointer to array of children */
  376. const void **data_end; /* one past end of internal prefix data array */
  377. } ptr;
  378. };
  379. struct lc_node {
  380. /* lc_flags contains the LC prefix length and a couple of bit flags
  381. * (apparently char-sized bit fields are a gcc extension)
  382. */
  383. #define LC_FLAGS_IS_LC 0x80
  384. #define LC_FLAGS_IS_TERMINAL 0x40
  385. #define LC_FLAGS_LEN_MASK 0x3f
  386. #ifdef WORDS_BIGENDIAN
  387. btrie_oct_t lc_flags;
  388. btrie_oct_t prefix[LC_BYTES_PER_NODE];
  389. #else
  390. btrie_oct_t prefix[LC_BYTES_PER_NODE];
  391. btrie_oct_t lc_flags;
  392. #endif
  393. union {
  394. node_t *child; /* pointer to child (if !is_terminal) */
  395. const void *data; /* the prefix data (if is_terminal) */
  396. } ptr;
  397. };
  398. union node_u {
  399. struct tbm_node tbm_node;
  400. struct lc_node lc_node;
  401. };
  402. struct free_hunk {
  403. struct free_hunk *next;
  404. };
  405. #define MAX_CHILD_ARRAY_LEN (TBM_FANOUT + TBM_FANOUT / 2)
  406. struct btrie {
  407. node_t root;
  408. rspamd_mempool_t *mp;
  409. struct free_hunk *free_list[MAX_CHILD_ARRAY_LEN];
  410. jmp_buf exception;
  411. /* mem mgmt stats */
  412. size_t alloc_total; /* total bytes allocated from mempool */
  413. size_t alloc_data; /* bytes allocated for TBM node int. prefix data */
  414. size_t alloc_waste; /* bytes wasted by rounding of data array size */
  415. #ifdef BTRIE_DEBUG_ALLOC
  416. size_t alloc_hist[MAX_CHILD_ARRAY_LEN * 2]; /* histogram of alloc sizes */
  417. #endif
  418. /* trie stats */
  419. size_t n_entries; /* number of entries */
  420. size_t n_tbm_nodes; /* total number of TBM nodes in tree */
  421. size_t n_lc_nodes; /* total number of LC nodes in tree */
  422. };
  423. /****************************************************************
  424. *
  425. * Memory management
  426. *
  427. * We will need to frequently resize child/data arrays. The current
  428. * mempool implementation does not support resizing/freeing, so here
  429. * we roll our own.
  430. */
  431. static inline void _free_hunk(struct btrie *btrie, void *buf, unsigned n_nodes)
  432. {
  433. struct free_hunk *hunk = buf;
  434. hunk->next = btrie->free_list[n_nodes - 1];
  435. btrie->free_list[n_nodes - 1] = hunk;
  436. }
  437. static inline void *
  438. _get_hunk(struct btrie *btrie, unsigned n_nodes)
  439. {
  440. struct free_hunk *hunk = btrie->free_list[n_nodes - 1];
  441. if (hunk != NULL)
  442. btrie->free_list[n_nodes - 1] = hunk->next;
  443. return hunk;
  444. }
  445. /* Get pointer to uninitialized child/data array.
  446. *
  447. * Allocates memory for an array of NDATA (void *)s followed by an
  448. * array of NCHILDREN (node_t)s. The returned pointer points to to
  449. * beginning of the children array (i.e. it points to (one past) the
  450. * end of the data array.)
  451. */
  452. static node_t *
  453. alloc_nodes(struct btrie *btrie, unsigned nchildren, unsigned ndata)
  454. {
  455. size_t n_nodes = nchildren + (ndata + 1) / 2;
  456. node_t *hunk;
  457. assert(n_nodes > 0 && n_nodes <= MAX_CHILD_ARRAY_LEN);
  458. hunk = _get_hunk(btrie, n_nodes);
  459. if (hunk == NULL) {
  460. /* Do not have free hunk of exactly the requested size, look for a
  461. * larger hunk. (The funny order in which we scan the buckets is
  462. * heuristically selected in an attempt to minimize unnecessary
  463. * creation of small fragments)
  464. */
  465. size_t n, skip = n_nodes > 4 ? 4 : n_nodes;
  466. for (n = n_nodes + skip; n <= MAX_CHILD_ARRAY_LEN; n++) {
  467. if ((hunk = _get_hunk(btrie, n)) != NULL) {
  468. _free_hunk(btrie, hunk + n_nodes, n - n_nodes);
  469. goto DONE;
  470. }
  471. }
  472. for (n = n_nodes + 1; n < n_nodes + skip && n <= MAX_CHILD_ARRAY_LEN;
  473. n++) {
  474. if ((hunk = _get_hunk(btrie, n)) != NULL) {
  475. _free_hunk(btrie, hunk + n_nodes, n - n_nodes);
  476. goto DONE;
  477. }
  478. }
  479. /* failed to find free hunk, allocate a fresh one */
  480. hunk = rspamd_mempool_alloc0(btrie->mp, n_nodes * sizeof(node_t));
  481. if (hunk == NULL)
  482. longjmp(btrie->exception, BTRIE_ALLOC_FAILED);
  483. btrie->alloc_total += n_nodes * sizeof(node_t);
  484. }
  485. DONE:
  486. btrie->alloc_data += ndata * sizeof(void *);
  487. btrie->alloc_waste += (ndata % 2) * sizeof(void *);
  488. #ifdef BTRIE_DEBUG_ALLOC
  489. btrie->alloc_hist[2 * nchildren + ndata]++;
  490. #endif
  491. /* adjust pointer to allow room for data array before child array */
  492. return hunk + (ndata + 1) / 2;
  493. }
  494. /* Free memory allocated by alloc_nodes */
  495. static void free_nodes(struct btrie *btrie, node_t *buf, unsigned nchildren,
  496. unsigned ndata)
  497. {
  498. size_t n_nodes = nchildren + (ndata + 1) / 2;
  499. assert(n_nodes > 0 && n_nodes <= MAX_CHILD_ARRAY_LEN);
  500. _free_hunk(btrie, buf - (ndata + 1) / 2, n_nodes);
  501. btrie->alloc_data -= ndata * sizeof(void *);
  502. btrie->alloc_waste -= (ndata % 2) * sizeof(void *);
  503. #ifdef BTRIE_DEBUG_ALLOC
  504. btrie->alloc_hist[2 * nchildren + ndata]--;
  505. #endif
  506. }
  507. /* Debugging/development only: */
  508. #ifdef BTRIE_DEBUG_ALLOC
  509. static void
  510. dump_alloc_hist(const struct btrie *btrie)
  511. {
  512. unsigned bin;
  513. size_t total_alloc = 0;
  514. size_t total_free = 0;
  515. size_t total_bytes = 0;
  516. size_t total_waste = 0;
  517. size_t total_free_bytes = 0;
  518. puts("hunk alloc free alloc wasted free");
  519. puts("size hunks hunks bytes bytes bytes");
  520. puts("==== ====== ====== ======== ======== ========");
  521. for (bin = 1; bin < 2 * MAX_CHILD_ARRAY_LEN; bin++) {
  522. size_t n_alloc = btrie->alloc_hist[bin];
  523. size_t bytes = n_alloc * bin * sizeof(void *);
  524. size_t waste_bytes = (bin % 2) * n_alloc * sizeof(void *);
  525. size_t n_free = 0, free_bytes;
  526. if (bin % 2 == 0) {
  527. const struct free_hunk *hunk;
  528. for (hunk = btrie->free_list[bin / 2 - 1]; hunk; hunk = hunk->next)
  529. n_free++;
  530. }
  531. free_bytes = n_free * bin * sizeof(void *);
  532. printf("%3zu: %6zu %6zu %8zu %8zu %8zu\n", bin * sizeof(void *),
  533. n_alloc, n_free, bytes, waste_bytes, free_bytes);
  534. total_alloc += n_alloc;
  535. total_free += n_free;
  536. total_bytes += bytes;
  537. total_waste += waste_bytes;
  538. total_free_bytes += free_bytes;
  539. }
  540. puts("---- ------ ------ -------- -------- --------");
  541. printf("SUM: %6zu %6zu %8zu %8zu %8zu\n",
  542. total_alloc, total_free, total_bytes, total_waste, total_free_bytes);
  543. }
  544. #endif
  545. /****************************************************************
  546. *
  547. * Bit twiddling
  548. *
  549. */
  550. static inline tbm_bitmap_t bit(unsigned b)
  551. {
  552. return 1U << ((1 << TBM_STRIDE) - 1 - b);
  553. }
  554. /* count the number of set bits in bitmap
  555. *
  556. * algorithm from
  557. * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
  558. */
  559. static inline unsigned count_bits(tbm_bitmap_t v)
  560. {
  561. /* Count set bits in parallel. */
  562. /* v = (v & 0x5555...) + ((v >> 1) & 0x5555...); */
  563. v -= (v >> 1) & (tbm_bitmap_t) ~0UL / 3;
  564. /* v = (v & 0x3333...) + ((v >> 2) & 0x3333...); */
  565. v = (v & (tbm_bitmap_t) ~0UL / 5) + ((v >> 2) & (tbm_bitmap_t) ~0UL / 5);
  566. /* v = (v & 0x0f0f...) + ((v >> 4) & 0x0f0f...); */
  567. v = (v + (v >> 4)) & (tbm_bitmap_t) ~0UL / 17;
  568. /* v = v % 255; */
  569. #if TBM_STRIDE == 4
  570. /* tbm_bitmap_t is uint16_t, avoid the multiply */
  571. return (v + (v >> 8)) & 0x0ff;
  572. #else
  573. return (v * (tbm_bitmap_t) (~0UL / 255)) >> ((sizeof(tbm_bitmap_t) - 1) * 8);
  574. #endif
  575. }
  576. static inline unsigned count_bits_before(tbm_bitmap_t bm, int b)
  577. {
  578. return b ? count_bits(bm >> ((1 << TBM_STRIDE) - b)) : 0;
  579. }
  580. static inline unsigned count_bits_from(tbm_bitmap_t bm, int b)
  581. {
  582. return count_bits(bm << b);
  583. }
  584. /* extracts a few bits from bitstring, returning them as an integer */
  585. static inline btrie_oct_t RSPAMD_NO_SANITIZE extract_bits(const btrie_oct_t *prefix, unsigned pos,
  586. unsigned nbits)
  587. {
  588. if (nbits == 0)
  589. return 0;
  590. else {
  591. unsigned v = (prefix[pos / 8] << 8) + prefix[pos / 8 + 1];
  592. return (v >> (16 - nbits - pos % 8)) & ((1U << nbits) - 1);
  593. }
  594. }
  595. static inline unsigned extract_bit(const btrie_oct_t *prefix, int pos)
  596. {
  597. return (prefix[pos / 8] >> (7 - pos % 8)) & 0x01;
  598. }
  599. /* get mask for high n bits of a byte */
  600. static inline btrie_oct_t high_bits(unsigned n)
  601. {
  602. return (btrie_oct_t) - (1U << (8 - n));
  603. }
  604. /* determine whether two prefixes are equal */
  605. static inline int prefixes_equal(const btrie_oct_t *pfx1,
  606. const btrie_oct_t *pfx2, unsigned len)
  607. {
  608. return (memcmp(pfx1, pfx2, len / 8) == 0 && (len % 8 == 0 ||
  609. ((pfx1[len / 8] ^ pfx2[len / 8]) & high_bits(len % 8)) == 0));
  610. }
  611. /* determine length of longest common subprefix */
  612. static inline unsigned common_prefix(const btrie_oct_t *pfx1,
  613. const btrie_oct_t *pfx2, unsigned len)
  614. {
  615. /* algorithm adapted from
  616. * http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogLookup
  617. */
  618. static btrie_oct_t leading_zeros[] =
  619. {
  620. 8,
  621. 7,
  622. 6,
  623. 6,
  624. 5,
  625. 5,
  626. 5,
  627. 5,
  628. 4,
  629. 4,
  630. 4,
  631. 4,
  632. 4,
  633. 4,
  634. 4,
  635. 4,
  636. 3,
  637. 3,
  638. 3,
  639. 3,
  640. 3,
  641. 3,
  642. 3,
  643. 3,
  644. 3,
  645. 3,
  646. 3,
  647. 3,
  648. 3,
  649. 3,
  650. 3,
  651. 3,
  652. 2,
  653. 2,
  654. 2,
  655. 2,
  656. 2,
  657. 2,
  658. 2,
  659. 2,
  660. 2,
  661. 2,
  662. 2,
  663. 2,
  664. 2,
  665. 2,
  666. 2,
  667. 2,
  668. 2,
  669. 2,
  670. 2,
  671. 2,
  672. 2,
  673. 2,
  674. 2,
  675. 2,
  676. 2,
  677. 2,
  678. 2,
  679. 2,
  680. 2,
  681. 2,
  682. 2,
  683. 2,
  684. 1,
  685. 1,
  686. 1,
  687. 1,
  688. 1,
  689. 1,
  690. 1,
  691. 1,
  692. 1,
  693. 1,
  694. 1,
  695. 1,
  696. 1,
  697. 1,
  698. 1,
  699. 1,
  700. 1,
  701. 1,
  702. 1,
  703. 1,
  704. 1,
  705. 1,
  706. 1,
  707. 1,
  708. 1,
  709. 1,
  710. 1,
  711. 1,
  712. 1,
  713. 1,
  714. 1,
  715. 1,
  716. 1,
  717. 1,
  718. 1,
  719. 1,
  720. 1,
  721. 1,
  722. 1,
  723. 1,
  724. 1,
  725. 1,
  726. 1,
  727. 1,
  728. 1,
  729. 1,
  730. 1,
  731. 1,
  732. 1,
  733. 1,
  734. 1,
  735. 1,
  736. 1,
  737. 1,
  738. 1,
  739. 1,
  740. 1,
  741. 1,
  742. 1,
  743. 1,
  744. 1,
  745. 1,
  746. 1,
  747. 1,
  748. 0,
  749. 0,
  750. 0,
  751. 0,
  752. 0,
  753. 0,
  754. 0,
  755. 0,
  756. 0,
  757. 0,
  758. 0,
  759. 0,
  760. 0,
  761. 0,
  762. 0,
  763. 0,
  764. 0,
  765. 0,
  766. 0,
  767. 0,
  768. 0,
  769. 0,
  770. 0,
  771. 0,
  772. 0,
  773. 0,
  774. 0,
  775. 0,
  776. 0,
  777. 0,
  778. 0,
  779. 0,
  780. 0,
  781. 0,
  782. 0,
  783. 0,
  784. 0,
  785. 0,
  786. 0,
  787. 0,
  788. 0,
  789. 0,
  790. 0,
  791. 0,
  792. 0,
  793. 0,
  794. 0,
  795. 0,
  796. 0,
  797. 0,
  798. 0,
  799. 0,
  800. 0,
  801. 0,
  802. 0,
  803. 0,
  804. 0,
  805. 0,
  806. 0,
  807. 0,
  808. 0,
  809. 0,
  810. 0,
  811. 0,
  812. 0,
  813. 0,
  814. 0,
  815. 0,
  816. 0,
  817. 0,
  818. 0,
  819. 0,
  820. 0,
  821. 0,
  822. 0,
  823. 0,
  824. 0,
  825. 0,
  826. 0,
  827. 0,
  828. 0,
  829. 0,
  830. 0,
  831. 0,
  832. 0,
  833. 0,
  834. 0,
  835. 0,
  836. 0,
  837. 0,
  838. 0,
  839. 0,
  840. 0,
  841. 0,
  842. 0,
  843. 0,
  844. 0,
  845. 0,
  846. 0,
  847. 0,
  848. 0,
  849. 0,
  850. 0,
  851. 0,
  852. 0,
  853. 0,
  854. 0,
  855. 0,
  856. 0,
  857. 0,
  858. 0,
  859. 0,
  860. 0,
  861. 0,
  862. 0,
  863. 0,
  864. 0,
  865. 0,
  866. 0,
  867. 0,
  868. 0,
  869. 0,
  870. 0,
  871. 0,
  872. 0,
  873. 0,
  874. 0,
  875. 0,
  876. };
  877. unsigned nb;
  878. for (nb = 0; nb < len / 8; nb++) {
  879. unsigned diff = *pfx1++ ^ *pfx2++;
  880. if (diff != 0)
  881. return 8 * nb + leading_zeros[diff];
  882. }
  883. if (len % 8) {
  884. unsigned n = leading_zeros[*pfx1 ^ *pfx2];
  885. if (n < len % 8)
  886. return 8 * nb + n;
  887. }
  888. return len;
  889. }
  890. /****************************************************************
  891. */
  892. static inline int is_empty_node(const node_t *node)
  893. {
  894. return node->tbm_node.ext_bm == 0 && node->tbm_node.int_bm == 0;
  895. }
  896. static inline int is_lc_node(const node_t *node)
  897. {
  898. return (node->lc_node.lc_flags & LC_FLAGS_IS_LC) != 0;
  899. }
  900. static inline int is_tbm_node(const node_t *node)
  901. {
  902. return !is_lc_node(node);
  903. }
  904. /* is node a TBM node with internal data? */
  905. static inline int has_data(const node_t *node)
  906. {
  907. return is_tbm_node(node) && node->tbm_node.int_bm != 0;
  908. }
  909. static inline unsigned base_index(unsigned pfx, unsigned plen)
  910. {
  911. assert(plen < TBM_STRIDE);
  912. assert(pfx < (1U << plen));
  913. return pfx | (1U << plen);
  914. }
  915. /* initialize node to an empty TBM node */
  916. static inline void init_empty_node(struct btrie *btrie, node_t *node)
  917. {
  918. memset(node, 0, sizeof(*node));
  919. btrie->n_tbm_nodes++;
  920. }
  921. /* get pointer to TBM internal prefix data */
  922. static inline const void **
  923. tbm_data_p(const struct tbm_node *node, unsigned pfx, unsigned plen)
  924. {
  925. unsigned bi = base_index(pfx, plen);
  926. if ((node->int_bm & bit(bi)) == 0)
  927. return NULL; /* no data */
  928. else {
  929. return &node->ptr.data_end[-(int) count_bits_from(node->int_bm, bi)];
  930. }
  931. }
  932. /* add an element to the internal data array */
  933. static void tbm_insert_data(struct btrie *btrie, struct tbm_node *node,
  934. unsigned pfx, unsigned plen, const void *data)
  935. {
  936. /* XXX: don't realloc if already big enough? */
  937. unsigned bi = base_index(pfx, plen);
  938. unsigned nchildren = count_bits(node->ext_bm);
  939. int ndata = count_bits(node->int_bm);
  940. unsigned di = count_bits_before(node->int_bm, bi);
  941. node_t *old_children = node->ptr.children;
  942. const void **old_data_beg = node->ptr.data_end - ndata;
  943. const void **data_beg;
  944. assert((node->int_bm & bit(bi)) == 0);
  945. node->ptr.children = alloc_nodes(btrie, nchildren, ndata + 1);
  946. data_beg = node->ptr.data_end - (ndata + 1);
  947. data_beg[di] = data;
  948. node->int_bm |= bit(bi);
  949. if (nchildren != 0 || ndata != 0) {
  950. memcpy(data_beg, old_data_beg, di * sizeof(data_beg[0]));
  951. memcpy(&data_beg[di + 1], &old_data_beg[di],
  952. (ndata - di) * sizeof(data_beg[0]) + nchildren * sizeof(node_t));
  953. free_nodes(btrie, old_children, nchildren, ndata);
  954. }
  955. }
  956. /* determine whether TBM has internal prefix data for pfx/plen or ancestors */
  957. static inline int has_internal_data(const struct tbm_node *node, unsigned pfx,
  958. unsigned plen)
  959. {
  960. #define BIT(n) (1U << ((1 << TBM_STRIDE) - 1 - (n)))
  961. #define B0() BIT(1) /* the bit for 0/0 */
  962. #define B1(n) (BIT((n) + 2) | B0()) /* the bits for n/1 and its ancestors */
  963. #define B2(n) (BIT((n) + 4) | B1(n >> 1)) /* the bits for n/2 and ancestors */
  964. #define B3(n) (BIT((n) + 8) | B2(n >> 1)) /* the bits for n/3 and ancestors */
  965. #define B4(n) (BIT((n) + 16) | B3(n >> 1)) /* the bits for n/4 and ancestors */
  966. static tbm_bitmap_t ancestors[] =
  967. { 0,
  968. B0(),
  969. B1(0),
  970. B1(1),
  971. B2(0),
  972. B2(1),
  973. B2(2),
  974. B2(3),
  975. B3(0),
  976. B3(1),
  977. B3(2),
  978. B3(3),
  979. B3(4),
  980. B3(5),
  981. B3(6),
  982. B3(7),
  983. #if TBM_STRIDE == 5
  984. B4(0),
  985. B4(1),
  986. B4(2),
  987. B4(3),
  988. B4(4),
  989. B4(5),
  990. B4(6),
  991. B4(7),
  992. B4(8),
  993. B4(
  994. 9),
  995. B4(10),
  996. B4(11),
  997. B4(12),
  998. B4(13),
  999. B4(14),
  1000. B4(15),
  1001. #elif TBM_STRIDE != 4
  1002. #error "unsupported TBM_STRIDE"
  1003. #endif
  1004. };
  1005. #undef B4
  1006. #undef B3
  1007. #undef B2
  1008. #undef B1
  1009. #undef B0
  1010. #undef BIT
  1011. return (node->int_bm & ancestors[base_index(pfx, plen)]) != 0;
  1012. }
  1013. /* get pointer to TBM extending path */
  1014. static inline node_t *
  1015. tbm_ext_path(const struct tbm_node *node, unsigned pfx)
  1016. {
  1017. if ((node->ext_bm & bit(pfx)) == 0)
  1018. return NULL;
  1019. else
  1020. return &node->ptr.children[count_bits_before(node->ext_bm, pfx)];
  1021. }
  1022. /* resize TBM node child array to make space for new child node */
  1023. static node_t *
  1024. tbm_insert_ext_path(struct btrie *btrie, struct tbm_node *node, unsigned pfx)
  1025. {
  1026. unsigned nchildren = count_bits(node->ext_bm);
  1027. unsigned ci = count_bits_before(node->ext_bm, pfx);
  1028. int ndata = count_bits(node->int_bm);
  1029. node_t *old_children = node->ptr.children;
  1030. const void **old_data_beg = node->ptr.data_end - ndata;
  1031. assert((node->ext_bm & bit(pfx)) == 0);
  1032. node->ptr.children = alloc_nodes(btrie, nchildren + 1, ndata);
  1033. init_empty_node(btrie, &node->ptr.children[ci]);
  1034. node->ext_bm |= bit(pfx);
  1035. if (nchildren != 0 || ndata != 0) {
  1036. const void **data_beg = node->ptr.data_end - ndata;
  1037. memcpy(data_beg, old_data_beg,
  1038. ndata * sizeof(data_beg[0]) + ci * sizeof(node_t));
  1039. memcpy(&node->ptr.children[ci + 1], &old_children[ci],
  1040. (nchildren - ci) * sizeof(old_children[0]));
  1041. free_nodes(btrie, old_children, nchildren, ndata);
  1042. }
  1043. return &node->ptr.children[ci];
  1044. }
  1045. static inline int lc_is_terminal(const struct lc_node *node)
  1046. {
  1047. return (node->lc_flags & LC_FLAGS_IS_TERMINAL) != 0;
  1048. }
  1049. static inline unsigned lc_len(const struct lc_node *node)
  1050. {
  1051. return node->lc_flags & LC_FLAGS_LEN_MASK;
  1052. }
  1053. static inline void lc_init_flags(struct lc_node *node, int is_terminal,
  1054. unsigned len)
  1055. {
  1056. assert((len & ~LC_FLAGS_LEN_MASK) == 0);
  1057. node->lc_flags = LC_FLAGS_IS_LC | len;
  1058. if (is_terminal)
  1059. node->lc_flags |= LC_FLAGS_IS_TERMINAL;
  1060. }
  1061. static inline void lc_add_to_len(struct lc_node *node, int increment)
  1062. {
  1063. unsigned new_len = lc_len(node) + increment;
  1064. assert((new_len & ~LC_FLAGS_LEN_MASK) == 0);
  1065. node->lc_flags = (node->lc_flags & ~LC_FLAGS_LEN_MASK) | new_len;
  1066. }
  1067. static inline unsigned lc_shift(unsigned pos)
  1068. {
  1069. return pos / 8;
  1070. }
  1071. static inline unsigned lc_base(unsigned pos)
  1072. {
  1073. return 8 * lc_shift(pos);
  1074. }
  1075. static inline unsigned lc_bits(const struct lc_node *node, unsigned pos)
  1076. {
  1077. return pos % 8 + lc_len(node);
  1078. }
  1079. static inline unsigned lc_bytes(const struct lc_node *node, unsigned pos)
  1080. {
  1081. return (lc_bits(node, pos) + 7) / 8;
  1082. }
  1083. static inline unsigned lc_leading_bits(const struct lc_node *node, unsigned pos,
  1084. unsigned nbits)
  1085. {
  1086. return extract_bits(node->prefix, pos % 8, nbits);
  1087. }
  1088. /* Initialize a new terminal LC node
  1089. *
  1090. * If prefix is too long to fit in a single LC node, then a chain
  1091. * of LC nodes will be created.
  1092. */
  1093. static void init_terminal_node(struct btrie *btrie, node_t *dst, unsigned pos,
  1094. const btrie_oct_t *prefix, unsigned len, const void *data)
  1095. {
  1096. struct lc_node *node = &dst->lc_node;
  1097. unsigned nbytes = (len + 7) / 8;
  1098. while (nbytes - lc_shift(pos) > LC_BYTES_PER_NODE) {
  1099. memcpy(node->prefix, prefix + lc_shift(pos), LC_BYTES_PER_NODE);
  1100. lc_init_flags(node, 0, 8 * LC_BYTES_PER_NODE - pos % 8);
  1101. node->ptr.child = alloc_nodes(btrie, 1, 0);
  1102. pos += lc_len(node);
  1103. node = &node->ptr.child->lc_node;
  1104. btrie->n_lc_nodes++;
  1105. }
  1106. memcpy(node->prefix, prefix + lc_shift(pos), nbytes - lc_shift(pos));
  1107. lc_init_flags(node, 1, len - pos);
  1108. node->ptr.data = data;
  1109. btrie->n_lc_nodes++;
  1110. }
  1111. /* merge chains of multiple LC nodes into a single LC node, if possible.
  1112. *
  1113. * also ensure that the leading nodes in the LC chain have maximum length.
  1114. */
  1115. static void coalesce_lc_node(struct btrie *btrie, struct lc_node *node,
  1116. unsigned pos)
  1117. {
  1118. while (!lc_is_terminal(node) && lc_bits(node, pos) < 8 * LC_BYTES_PER_NODE && is_lc_node(node->ptr.child)) {
  1119. struct lc_node *child = &node->ptr.child->lc_node;
  1120. unsigned spare_bits = 8 * LC_BYTES_PER_NODE - lc_bits(node, pos);
  1121. unsigned end = pos + lc_len(node);
  1122. unsigned shift = lc_shift(end) - lc_shift(pos);
  1123. if (lc_len(child) <= spare_bits) {
  1124. /* node plus child will fit in single node - merge */
  1125. memcpy(node->prefix + shift, child->prefix, lc_bytes(child, end));
  1126. lc_init_flags(node, lc_is_terminal(child),
  1127. lc_len(node) + lc_len(child));
  1128. node->ptr = child->ptr;
  1129. free_nodes(btrie, (node_t *) child, 1, 0);
  1130. btrie->n_lc_nodes--;
  1131. }
  1132. else {
  1133. /* can't merge, but can take some of children bits */
  1134. unsigned cshift = lc_shift(end + spare_bits) - lc_shift(end);
  1135. memcpy(node->prefix + shift, child->prefix,
  1136. LC_BYTES_PER_NODE - shift);
  1137. lc_add_to_len(node, spare_bits);
  1138. if (cshift)
  1139. memmove(child->prefix, child->prefix + cshift,
  1140. lc_bytes(child, end) - cshift);
  1141. assert(lc_len(child) > spare_bits);
  1142. lc_add_to_len(child, -spare_bits);
  1143. pos += lc_len(node);
  1144. node = child;
  1145. }
  1146. }
  1147. }
  1148. static void init_tbm_node(struct btrie *btrie, node_t *node, unsigned pos,
  1149. const btrie_oct_t pbyte, const void **root_data_p, node_t *left,
  1150. node_t *right);
  1151. /* given an LC node at orig_pos, create a new (shorter) node at pos */
  1152. static void shorten_lc_node(struct btrie *btrie, node_t *dst, unsigned pos,
  1153. struct lc_node *src, unsigned orig_pos)
  1154. {
  1155. assert(orig_pos < pos);
  1156. assert(lc_len(src) >= pos - orig_pos);
  1157. assert(dst != (node_t *) src);
  1158. if (lc_len(src) == pos - orig_pos && !lc_is_terminal(src)) {
  1159. /* just steal the child */
  1160. node_t *child = src->ptr.child;
  1161. *dst = *child;
  1162. free_nodes(btrie, child, 1, 0);
  1163. btrie->n_lc_nodes--;
  1164. }
  1165. else {
  1166. struct lc_node *node = &dst->lc_node;
  1167. unsigned shift = lc_shift(pos) - lc_shift(orig_pos);
  1168. if (shift) {
  1169. memmove(node->prefix, src->prefix + shift,
  1170. lc_bytes(src, orig_pos) - shift);
  1171. node->lc_flags = src->lc_flags;
  1172. node->ptr = src->ptr;
  1173. }
  1174. else {
  1175. *node = *src;
  1176. }
  1177. lc_add_to_len(node, -(pos - orig_pos));
  1178. coalesce_lc_node(btrie, node, pos);
  1179. }
  1180. }
  1181. /* convert LC node to non-terminal LC node of length len *in place*
  1182. *
  1183. * on entry, node must have length at least len
  1184. */
  1185. static void split_lc_node(struct btrie *btrie, struct lc_node *node,
  1186. unsigned pos, unsigned len)
  1187. {
  1188. node_t *child = alloc_nodes(btrie, 1, 0);
  1189. assert(lc_len(node) >= len);
  1190. shorten_lc_node(btrie, child, pos + len, node, pos);
  1191. lc_init_flags(node, 0, len);
  1192. node->ptr.child = child;
  1193. btrie->n_lc_nodes++;
  1194. }
  1195. /* convert non-terminal LC node of length one to a TBM node *in place* */
  1196. static void convert_lc_node_1(struct btrie *btrie, struct lc_node *node,
  1197. unsigned pos)
  1198. {
  1199. btrie_oct_t pbyte = node->prefix[0];
  1200. node_t *child = node->ptr.child;
  1201. node_t *left, *right;
  1202. assert(lc_len(node) == 1);
  1203. assert(!lc_is_terminal(node));
  1204. if (extract_bit(node->prefix, pos % 8))
  1205. left = NULL, right = child;
  1206. else
  1207. left = child, right = NULL;
  1208. init_tbm_node(btrie, (node_t *) node, pos, pbyte, NULL, left, right);
  1209. free_nodes(btrie, child, 1, 0);
  1210. btrie->n_lc_nodes--;
  1211. }
  1212. /* convert an LC node to TBM node *in place* */
  1213. static void convert_lc_node(struct btrie *btrie, struct lc_node *node,
  1214. unsigned pos)
  1215. {
  1216. unsigned len = lc_len(node);
  1217. if (len >= TBM_STRIDE) {
  1218. unsigned pfx = lc_leading_bits(node, pos, TBM_STRIDE);
  1219. struct tbm_node *result = (struct tbm_node *) node;
  1220. /* split to LC of len TBM_STRIDE followed by child (extending path) */
  1221. split_lc_node(btrie, node, pos, TBM_STRIDE);
  1222. /* then convert leading LC node to TBM node */
  1223. result->int_bm = 0;
  1224. result->ext_bm = bit(pfx);
  1225. btrie->n_lc_nodes--;
  1226. btrie->n_tbm_nodes++;
  1227. }
  1228. else if (lc_is_terminal(node)) {
  1229. /* convert short terminal LC to TBM (with internal data) */
  1230. unsigned pfx = lc_leading_bits(node, pos, len);
  1231. const void *data = node->ptr.data;
  1232. node_t *result = (node_t *) node;
  1233. init_empty_node(btrie, result);
  1234. tbm_insert_data(btrie, &result->tbm_node, pfx, len, data);
  1235. btrie->n_lc_nodes--;
  1236. }
  1237. else {
  1238. assert(len > 0);
  1239. for (; len > 1; len--) {
  1240. split_lc_node(btrie, node, pos, len - 1);
  1241. convert_lc_node_1(btrie, &node->ptr.child->lc_node, pos + len - 1);
  1242. }
  1243. convert_lc_node_1(btrie, node, pos);
  1244. }
  1245. }
  1246. static void insert_lc_node(struct btrie *btrie, node_t *dst, unsigned pos,
  1247. btrie_oct_t pbyte, unsigned last_bit, node_t *tail)
  1248. {
  1249. struct lc_node *node = &dst->lc_node;
  1250. btrie_oct_t mask = 1 << (7 - (pos % 8));
  1251. btrie_oct_t bit = last_bit ? mask : 0;
  1252. if (mask != 0x01 && is_lc_node(tail)) {
  1253. /* optimization: LC tail has room for the extra bit (without shifting) */
  1254. assert((tail->lc_node.prefix[0] & mask) == bit);
  1255. *node = tail->lc_node;
  1256. lc_add_to_len(node, 1);
  1257. return;
  1258. }
  1259. /* add new leading LC node of len 1 */
  1260. node->prefix[0] = pbyte | bit;
  1261. lc_init_flags(node, 0, 1);
  1262. node->ptr.child = alloc_nodes(btrie, 1, 0);
  1263. node->ptr.child[0] = *tail;
  1264. btrie->n_lc_nodes++;
  1265. if (is_lc_node(tail))
  1266. coalesce_lc_node(btrie, node, pos);
  1267. }
  1268. /* given:
  1269. * pbyte: the bits in the prefix between lc_base(pos) and pos
  1270. * pfx: the next TBM_STRIDE bits in the prefix starting at pos
  1271. * returns:
  1272. * the bits in the prefix between lc_base(pos + plen) and pos + plen
  1273. */
  1274. static inline btrie_oct_t next_pbyte(btrie_oct_t pbyte, unsigned pos,
  1275. unsigned pfx)
  1276. {
  1277. unsigned end = pos + TBM_STRIDE;
  1278. if (end % 8 != 0) {
  1279. btrie_oct_t nbyte = (btrie_oct_t) pfx << (8 - end % 8);
  1280. if (end % 8 > TBM_STRIDE)
  1281. nbyte |= pbyte & high_bits(pos % 8);
  1282. return nbyte;
  1283. }
  1284. return 0;
  1285. }
  1286. /* construct a new TBM node, given the data and children of the
  1287. * root prefix of the new node.
  1288. */
  1289. static void init_tbm_node(struct btrie *btrie, node_t *dst, unsigned pos,
  1290. const btrie_oct_t pbyte, const void **root_data_p, node_t *left,
  1291. node_t *right)
  1292. {
  1293. struct tbm_node *node = &dst->tbm_node;
  1294. unsigned nchildren = 0;
  1295. unsigned ndata = 0;
  1296. node_t children[TBM_FANOUT];
  1297. const void *data[TBM_FANOUT - 1];
  1298. tbm_bitmap_t ext_bm = 0;
  1299. tbm_bitmap_t int_bm = 0;
  1300. unsigned i, d, pfx_base;
  1301. if (left && is_lc_node(left) && lc_len(&left->lc_node) < TBM_STRIDE)
  1302. convert_lc_node(btrie, &left->lc_node, pos + 1);
  1303. if (right && is_lc_node(right) && lc_len(&right->lc_node) < TBM_STRIDE)
  1304. convert_lc_node(btrie, &right->lc_node, pos + 1);
  1305. /* set internal data for root prefix */
  1306. if (root_data_p) {
  1307. data[ndata++] = *root_data_p;
  1308. int_bm |= bit(base_index(0, 0));
  1309. }
  1310. /* copy internal data from children */
  1311. for (d = 0; d < TBM_STRIDE - 1; d++) {
  1312. if (left && has_data(left)) {
  1313. for (i = 0; i < 1U << d; i++) {
  1314. const void **data_p = tbm_data_p(&left->tbm_node, i, d);
  1315. if (data_p) {
  1316. data[ndata++] = *data_p;
  1317. int_bm |= bit(base_index(i, d + 1));
  1318. }
  1319. }
  1320. }
  1321. if (right && has_data(right)) {
  1322. for (i = 0; i < 1U << d; i++) {
  1323. const void **data_p = tbm_data_p(&right->tbm_node, i, d);
  1324. if (data_p) {
  1325. data[ndata++] = *data_p;
  1326. int_bm |= bit(base_index(i + (1 << d), d + 1));
  1327. }
  1328. }
  1329. }
  1330. }
  1331. /* copy extending paths */
  1332. for (pfx_base = 0; pfx_base < TBM_FANOUT; pfx_base += TBM_FANOUT / 2) {
  1333. node_t *child = pfx_base ? right : left;
  1334. if (child == NULL) {
  1335. continue;
  1336. }
  1337. else if (is_lc_node(child)) {
  1338. unsigned pfx = pfx_base + lc_leading_bits(&child->lc_node, pos + 1,
  1339. TBM_STRIDE - 1);
  1340. /* child is LC node, just shorten it by TBM_STRIDE - 1 */
  1341. shorten_lc_node(btrie, &children[nchildren++], pos + TBM_STRIDE,
  1342. &child->lc_node, pos + 1);
  1343. ext_bm |= bit(pfx);
  1344. }
  1345. else if (!is_empty_node(child)) {
  1346. /* convert deepest internal prefixes of child to extending paths
  1347. * of the new node
  1348. */
  1349. for (i = 0; i < TBM_FANOUT / 2; i++) {
  1350. const void **data_p = tbm_data_p(&child->tbm_node, i,
  1351. TBM_STRIDE - 1);
  1352. node_t *left_ext = tbm_ext_path(&child->tbm_node, 2 * i);
  1353. node_t *right_ext = tbm_ext_path(&child->tbm_node, 2 * i + 1);
  1354. if (data_p || left_ext || right_ext) {
  1355. node_t *ext_path = &children[nchildren++];
  1356. unsigned pfx = pfx_base + i;
  1357. btrie_oct_t npbyte = next_pbyte(pbyte, pos, pfx);
  1358. ext_bm |= bit(pfx);
  1359. if (left_ext == NULL && right_ext == NULL) {
  1360. /* only have data - set ext_path to zero-length terminal LC node */
  1361. lc_init_flags(&ext_path->lc_node, 1, 0);
  1362. ext_path->lc_node.prefix[0] = npbyte;
  1363. ext_path->lc_node.ptr.data = *data_p;
  1364. btrie->n_lc_nodes++;
  1365. }
  1366. else if (data_p || (left_ext && right_ext)) {
  1367. /* have at least two of data, left_ext, right_ext
  1368. * ext_path must be a full TBM node */
  1369. init_tbm_node(btrie, ext_path, pos + TBM_STRIDE,
  1370. npbyte, data_p, left_ext, right_ext);
  1371. }
  1372. else if (left_ext) {
  1373. /* have only left_ext, insert length-one LC node */
  1374. insert_lc_node(btrie, ext_path, pos + TBM_STRIDE,
  1375. npbyte, 0, left_ext);
  1376. }
  1377. else {
  1378. /* have only right_ext, insert length-one LC node */
  1379. insert_lc_node(btrie, ext_path, pos + TBM_STRIDE,
  1380. npbyte, 1, right_ext);
  1381. }
  1382. }
  1383. }
  1384. btrie->n_tbm_nodes--;
  1385. free_nodes(btrie, child->tbm_node.ptr.children,
  1386. count_bits(child->tbm_node.ext_bm),
  1387. count_bits(child->tbm_node.int_bm));
  1388. }
  1389. }
  1390. assert(count_bits(int_bm) == ndata);
  1391. assert(count_bits(ext_bm) == nchildren);
  1392. node->ptr.children = alloc_nodes(btrie, nchildren, ndata);
  1393. memcpy(node->ptr.data_end - (int) ndata, data, ndata * sizeof(data[0]));
  1394. memcpy(node->ptr.children, children, nchildren * sizeof(children[0]));
  1395. node->ext_bm = ext_bm;
  1396. node->int_bm = int_bm;
  1397. btrie->n_tbm_nodes++;
  1398. }
  1399. static enum btrie_result add_to_trie(struct btrie *btrie, node_t *node,
  1400. unsigned pos, const btrie_oct_t *prefix, unsigned len, const void *data)
  1401. {
  1402. for (;;) {
  1403. if (is_lc_node(node)) {
  1404. struct lc_node *lc_node = &node->lc_node;
  1405. unsigned end = pos + lc_len(lc_node);
  1406. unsigned cbits = common_prefix(prefix + lc_shift(pos),
  1407. lc_node->prefix, (len < end ? len : end) - lc_base(pos));
  1408. unsigned clen = lc_base(pos) + cbits; /* position of first mismatch */
  1409. if (clen == end && !lc_is_terminal(lc_node)) {
  1410. /* matched entire prefix of LC node, proceed to child */
  1411. assert(lc_len(lc_node) > 0);
  1412. node = lc_node->ptr.child;
  1413. pos = end;
  1414. }
  1415. else if (clen == end && len == end && lc_is_terminal(lc_node)) {
  1416. /* exact match for terminal node - already have data for prefix */
  1417. return BTRIE_DUPLICATE_PREFIX;
  1418. }
  1419. else {
  1420. assert(clen < end || (lc_is_terminal(lc_node) && len > end));
  1421. /* Need to insert new TBM node at clen */
  1422. if (clen > pos) {
  1423. split_lc_node(btrie, lc_node, pos, clen - pos);
  1424. node = lc_node->ptr.child;
  1425. assert(is_lc_node(node));
  1426. pos = clen;
  1427. }
  1428. convert_lc_node(btrie, &node->lc_node, pos);
  1429. }
  1430. }
  1431. else if (is_empty_node(node)) {
  1432. /* at empty TBM node - just replace with terminal LC node */
  1433. init_terminal_node(btrie, node, pos, prefix, len, data);
  1434. btrie->n_entries++;
  1435. btrie->n_tbm_nodes--;
  1436. return BTRIE_OKAY;
  1437. }
  1438. else {
  1439. struct tbm_node *tbm_node = &node->tbm_node;
  1440. unsigned end = pos + TBM_STRIDE;
  1441. if (len < end) {
  1442. unsigned plen = len - pos;
  1443. unsigned pfx = extract_bits(prefix, pos, plen);
  1444. if (tbm_data_p(tbm_node, pfx, plen) != NULL)
  1445. return BTRIE_DUPLICATE_PREFIX; /* prefix already has data */
  1446. else {
  1447. tbm_insert_data(btrie, tbm_node, pfx, plen, data);
  1448. btrie->n_entries++;
  1449. return BTRIE_OKAY;
  1450. }
  1451. }
  1452. else {
  1453. unsigned pfx = extract_bits(prefix, pos, TBM_STRIDE);
  1454. /* follow extending path */
  1455. node = tbm_ext_path(tbm_node, pfx);
  1456. if (node == NULL)
  1457. node = tbm_insert_ext_path(btrie, tbm_node, pfx);
  1458. pos = end;
  1459. }
  1460. }
  1461. }
  1462. }
  1463. static const void *
  1464. search_trie(const node_t *node, unsigned pos, const btrie_oct_t *prefix,
  1465. unsigned len)
  1466. {
  1467. /* remember last TBM node seen with internal data */
  1468. const struct tbm_node *int_node = 0;
  1469. unsigned int_pfx = 0, int_plen = 0;
  1470. while (node) {
  1471. if (is_lc_node(node)) {
  1472. const struct lc_node *lc_node = &node->lc_node;
  1473. unsigned end = pos + lc_len(lc_node);
  1474. if (len < end)
  1475. break;
  1476. if (!prefixes_equal(prefix + lc_shift(pos), lc_node->prefix,
  1477. end - lc_base(pos)))
  1478. break;
  1479. if (lc_is_terminal(lc_node))
  1480. return lc_node->ptr.data; /* found terminal node */
  1481. pos = end;
  1482. node = lc_node->ptr.child;
  1483. }
  1484. else {
  1485. const struct tbm_node *tbm_node = &node->tbm_node;
  1486. unsigned end = pos + TBM_STRIDE;
  1487. if (len < end) {
  1488. unsigned plen = len - pos;
  1489. unsigned pfx = extract_bits(prefix, pos, plen);
  1490. if (has_internal_data(tbm_node, pfx, plen)) {
  1491. int_node = tbm_node;
  1492. int_pfx = pfx;
  1493. int_plen = plen;
  1494. }
  1495. break;
  1496. }
  1497. else {
  1498. unsigned pfx = extract_bits(prefix, pos, TBM_STRIDE);
  1499. if (has_internal_data(tbm_node, pfx >> 1, TBM_STRIDE - 1)) {
  1500. int_node = tbm_node;
  1501. int_pfx = pfx >> 1;
  1502. int_plen = TBM_STRIDE - 1;
  1503. }
  1504. pos = end;
  1505. node = tbm_ext_path(tbm_node, pfx);
  1506. }
  1507. }
  1508. }
  1509. if (int_node) {
  1510. const void **data_p = tbm_data_p(int_node, int_pfx, int_plen);
  1511. while (data_p == NULL) {
  1512. assert(int_plen > 0);
  1513. int_pfx >>= 1;
  1514. int_plen--;
  1515. data_p = tbm_data_p(int_node, int_pfx, int_plen);
  1516. }
  1517. return *data_p;
  1518. }
  1519. return NULL;
  1520. }
  1521. struct btrie *
  1522. btrie_init(rspamd_mempool_t *mp)
  1523. {
  1524. struct btrie *btrie;
  1525. if (!(btrie = rspamd_mempool_alloc0(mp, sizeof(*btrie)))) {
  1526. return NULL;
  1527. }
  1528. btrie->mp = mp;
  1529. btrie->alloc_total = sizeof(*btrie);
  1530. /* count the empty root node */
  1531. btrie->n_tbm_nodes = 1;
  1532. return btrie;
  1533. }
  1534. enum btrie_result btrie_add_prefix(struct btrie *btrie,
  1535. const btrie_oct_t *prefix, unsigned len, const void *data)
  1536. {
  1537. enum btrie_result rv;
  1538. if ((rv = setjmp(btrie->exception)) != 0)
  1539. return rv; /* out of memory */
  1540. return add_to_trie(btrie, &btrie->root, 0, prefix, len, data);
  1541. }
  1542. const void *
  1543. btrie_lookup(const struct btrie *btrie, const btrie_oct_t *prefix, unsigned len)
  1544. {
  1545. return search_trie(&btrie->root, 0, prefix, len);
  1546. }
  1547. /****************************************************************
  1548. *
  1549. * btrie_stats() - statistics reporting
  1550. */
  1551. #ifdef BTRIE_EXTENDED_STATS
  1552. /* Define BTRIE_EXTENDED_STATS to get extra statistics (including
  1553. * trie depth). This statistics require a traversal of the entire trie
  1554. * to compute, and so are disabled by default.
  1555. */
  1556. struct stats {
  1557. size_t max_depth;
  1558. size_t total_depth;
  1559. #ifndef NDEBUG
  1560. size_t n_lc_nodes;
  1561. size_t n_tbm_nodes;
  1562. size_t n_entries;
  1563. size_t alloc_data;
  1564. size_t alloc_waste;
  1565. #endif
  1566. };
  1567. static void
  1568. node_stats(const node_t *node, size_t depth, struct stats *stats)
  1569. {
  1570. if (depth > stats->max_depth)
  1571. stats->max_depth = depth;
  1572. stats->total_depth += depth;
  1573. if (is_lc_node(node)) {
  1574. #ifndef NDEBUG
  1575. stats->n_lc_nodes++;
  1576. #endif
  1577. if (!lc_is_terminal(&node->lc_node))
  1578. node_stats(node->lc_node.ptr.child, depth + 1, stats);
  1579. #ifndef NDEBUG
  1580. else
  1581. stats->n_entries++;
  1582. #endif
  1583. }
  1584. else {
  1585. unsigned i;
  1586. unsigned nchildren = count_bits(node->tbm_node.ext_bm);
  1587. #ifndef NDEBUG
  1588. unsigned ndata = count_bits(node->tbm_node.int_bm);
  1589. stats->n_tbm_nodes++;
  1590. stats->n_entries += ndata;
  1591. stats->alloc_data += ndata * sizeof(void *);
  1592. stats->alloc_waste += (ndata % 2) * sizeof(void *);
  1593. #endif
  1594. for (i = 0; i < nchildren; i++)
  1595. node_stats(&node->tbm_node.ptr.children[i], depth + 1, stats);
  1596. }
  1597. }
  1598. #endif /* BTRIE_EXTENDED_STATS */
  1599. #ifndef NDEBUG
  1600. static size_t count_free(const struct btrie *btrie)
  1601. {
  1602. size_t total = 0;
  1603. unsigned sz;
  1604. for (sz = 1; sz <= MAX_CHILD_ARRAY_LEN; sz++) {
  1605. const struct free_hunk *free = btrie->free_list[sz - 1];
  1606. size_t n;
  1607. for (n = 0; free; n++)
  1608. free = free->next;
  1609. total += sz * n;
  1610. }
  1611. return total * sizeof(node_t);
  1612. }
  1613. #endif /* not NDEBUG */
  1614. const char *
  1615. btrie_stats(const struct btrie *btrie, unsigned int duplicates)
  1616. {
  1617. static char buf[128];
  1618. size_t n_nodes = btrie->n_lc_nodes + btrie->n_tbm_nodes;
  1619. size_t alloc_free = (btrie->alloc_total + sizeof(node_t) /* do not double-count the root node */
  1620. - n_nodes * sizeof(node_t) - btrie->alloc_data - btrie->alloc_waste - sizeof(*btrie));
  1621. #ifdef BTRIE_EXTENDED_STATS
  1622. struct stats stats;
  1623. double average_depth;
  1624. memset(&stats, 0, sizeof(stats));
  1625. node_stats(&btrie->root, 0, &stats);
  1626. average_depth = (double) stats.total_depth / n_nodes;
  1627. #ifndef NDEBUG
  1628. /* check the node counts */
  1629. assert(stats.n_lc_nodes == btrie->n_lc_nodes);
  1630. assert(stats.n_tbm_nodes == btrie->n_tbm_nodes);
  1631. assert(stats.n_entries == btrie->n_entries);
  1632. assert(stats.alloc_data == btrie->alloc_data);
  1633. assert(stats.alloc_waste == btrie->alloc_waste);
  1634. #endif /* not NDEBUG */
  1635. #endif /* BTRIE_EXTENDED_STATS */
  1636. #ifndef NDEBUG
  1637. /* check that we haven't lost any memory */
  1638. assert(alloc_free == count_free(btrie));
  1639. #endif
  1640. #ifdef BTRIE_DEBUG_ALLOC
  1641. dump_alloc_hist(btrie);
  1642. #endif
  1643. #ifdef BTRIE_EXTENDED_STATS
  1644. snprintf(buf, sizeof(buf),
  1645. "ents=%lu tbm=%lu lc=%lu mem=%.0fk free=%lu waste=%lu"
  1646. " depth=%.1f/%lu",
  1647. (long unsigned) btrie->n_entries, (long unsigned) btrie->n_tbm_nodes,
  1648. (long unsigned) btrie->n_lc_nodes, (double) btrie->alloc_total / 1024,
  1649. (long unsigned) alloc_free, (long unsigned) btrie->alloc_waste, average_depth, (long unsigned) stats.max_depth);
  1650. #else
  1651. snprintf(buf, sizeof(buf),
  1652. "ents=%lu dup=%u tbm=%lu lc=%lu mem=%.0fk free=%lu waste=%lu",
  1653. (long unsigned) btrie->n_entries,
  1654. duplicates,
  1655. (long unsigned) btrie->n_tbm_nodes,
  1656. (long unsigned) btrie->n_lc_nodes, (double) btrie->alloc_total / 1024,
  1657. (long unsigned) alloc_free, (long unsigned) btrie->alloc_waste);
  1658. #endif
  1659. buf[sizeof(buf) - 1] = '\0';
  1660. return buf;
  1661. }
  1662. /****************************************************************/
  1663. #ifndef NO_MASTER_DUMP
  1664. struct walk_context {
  1665. btrie_walk_cb_t *callback;
  1666. void *user_data;
  1667. btrie_oct_t prefix[(BTRIE_MAX_PREFIX + 7) / 8];
  1668. };
  1669. static void
  1670. walk_node(const node_t *node, unsigned pos, struct walk_context *ctx);
  1671. static void walk_tbm_node(const struct tbm_node *node, unsigned pos,
  1672. unsigned pfx, unsigned plen, struct walk_context *ctx)
  1673. {
  1674. btrie_oct_t *prefix = ctx->prefix;
  1675. int pbyte = pos / 8;
  1676. btrie_oct_t pbit = 0x80 >> (pos % 8);
  1677. const void **data_p = tbm_data_p(node, pfx, plen);
  1678. if (pos >= BTRIE_MAX_PREFIX) {
  1679. /* This can/should not happen, but don't overwrite buffers if it does. */
  1680. return;
  1681. }
  1682. if (data_p)
  1683. ctx->callback(prefix, pos, *data_p, 0, ctx->user_data);
  1684. /* walk children */
  1685. if (plen < TBM_STRIDE - 1) {
  1686. /* children are internal prefixes in same node */
  1687. walk_tbm_node(node, pos + 1, pfx << 1, plen + 1, ctx);
  1688. prefix[pbyte] |= pbit;
  1689. walk_tbm_node(node, pos + 1, (pfx << 1) + 1, plen + 1, ctx);
  1690. prefix[pbyte] &= ~pbit;
  1691. }
  1692. else {
  1693. /* children are extending paths */
  1694. const node_t *ext_path;
  1695. if ((ext_path = tbm_ext_path(node, pfx << 1)) != NULL)
  1696. walk_node(ext_path, pos + 1, ctx);
  1697. if ((ext_path = tbm_ext_path(node, (pfx << 1) + 1)) != NULL) {
  1698. prefix[pbyte] |= pbit;
  1699. walk_node(ext_path, pos + 1, ctx);
  1700. prefix[pbyte] &= ~pbit;
  1701. }
  1702. }
  1703. if (data_p)
  1704. ctx->callback(prefix, pos, *data_p, 1, ctx->user_data);
  1705. }
  1706. static void walk_lc_node(const struct lc_node *node, unsigned pos,
  1707. struct walk_context *ctx)
  1708. {
  1709. btrie_oct_t *prefix = ctx->prefix;
  1710. unsigned end = pos + lc_len(node);
  1711. btrie_oct_t save_prefix = prefix[lc_shift(pos)];
  1712. if (end > BTRIE_MAX_PREFIX) {
  1713. /* This can/should not happen, but don't overwrite buffers if it does. */
  1714. return;
  1715. }
  1716. /* construct full prefix to node */
  1717. memcpy(&prefix[lc_shift(pos)], node->prefix, lc_bytes(node, pos));
  1718. if (end % 8)
  1719. prefix[end / 8] &= high_bits(end % 8);
  1720. if (lc_is_terminal(node)) {
  1721. ctx->callback(prefix, end, node->ptr.data, 0, ctx->user_data);
  1722. ctx->callback(prefix, end, node->ptr.data, 1, ctx->user_data);
  1723. }
  1724. else
  1725. walk_node(node->ptr.child, end, ctx);
  1726. prefix[lc_shift(pos)] = save_prefix; /* restore parents prefix */
  1727. if (lc_bytes(node, pos) > 1)
  1728. memset(&prefix[lc_shift(pos) + 1], 0, lc_bytes(node, pos) - 1);
  1729. }
  1730. static void walk_node(const node_t *node, unsigned pos,
  1731. struct walk_context *ctx)
  1732. {
  1733. if (is_lc_node(node))
  1734. walk_lc_node(&node->lc_node, pos, ctx);
  1735. else
  1736. walk_tbm_node(&node->tbm_node, pos, 0, 0, ctx);
  1737. }
  1738. /* walk trie in lexicographical order
  1739. *
  1740. * calls callback twice (once preorder, once postorder) at each prefix
  1741. */
  1742. void btrie_walk(const struct btrie *btrie, btrie_walk_cb_t *callback,
  1743. void *user_data)
  1744. {
  1745. struct walk_context ctx;
  1746. memset(&ctx, 0, sizeof(ctx));
  1747. ctx.callback = callback;
  1748. ctx.user_data = user_data;
  1749. walk_node(&btrie->root, 0, &ctx);
  1750. }
  1751. #endif /* not NO_MASTER_DUMP */
  1752. #ifdef TEST
  1753. /*****************************************************************
  1754. *
  1755. * Unit tests
  1756. *
  1757. */
  1758. #include <stdio.h>
  1759. #ifndef UNUSED
  1760. #define UNUSED __attribute__((unused))
  1761. #endif
  1762. /* bogus replacements mp_alloc for running self-tests */
  1763. void *
  1764. mp_alloc(UNUSED struct mempool *mp, unsigned sz, UNUSED int align)
  1765. {
  1766. return malloc(sz);
  1767. }
  1768. #if 0
  1769. #define PASS(name) puts("OK " name)
  1770. #else
  1771. #define PASS(name) \
  1772. fputs(".", stdout); \
  1773. fflush(stdout)
  1774. #endif
  1775. const char *pgm_name = "???";
  1776. static void
  1777. test_struct_node_packing()
  1778. {
  1779. node_t node;
  1780. assert(sizeof(struct tbm_node) == 2 * sizeof(void *));
  1781. assert(sizeof(struct lc_node) == 2 * sizeof(void *));
  1782. assert(sizeof(node_t) == 2 * sizeof(void *));
  1783. /* The lc_node bit must be an alias for bit zero of int_bm, since
  1784. * that is the only unused bit in the TBM node structure.
  1785. */
  1786. memset(&node, 0, sizeof(node));
  1787. assert(node.tbm_node.int_bm == 0);
  1788. lc_init_flags(&node.lc_node, 0, 0);
  1789. assert(node.tbm_node.int_bm == bit(0));
  1790. PASS("test_struct_node_packing");
  1791. }
  1792. static void
  1793. test_bit()
  1794. {
  1795. tbm_bitmap_t ones = ~(tbm_bitmap_t) 0;
  1796. tbm_bitmap_t high_bit = ones ^ (ones >> 1);
  1797. assert(bit(0) == high_bit);
  1798. assert(bit(1) == high_bit >> 1);
  1799. assert(bit(8 * sizeof(tbm_bitmap_t) - 1) == 1);
  1800. PASS("test_bit");
  1801. }
  1802. static void
  1803. test_count_bits()
  1804. {
  1805. unsigned max_bits = sizeof(tbm_bitmap_t) * 8;
  1806. tbm_bitmap_t ones = ~(tbm_bitmap_t) 0;
  1807. assert(count_bits(0) == 0);
  1808. assert(count_bits(1) == 1);
  1809. assert(count_bits(2) == 1);
  1810. assert(count_bits(3) == 2);
  1811. assert(count_bits(ones) == max_bits);
  1812. assert(count_bits(~1) == max_bits - 1);
  1813. /* count_bits(0x5555....) */
  1814. assert(count_bits(ones / 3) == max_bits / 2);
  1815. /* count_bits(0x3333...) */
  1816. assert(count_bits(ones / 5) == max_bits / 2);
  1817. /* count_bits(0x0f0f...) */
  1818. assert(count_bits(ones / 17) == max_bits / 2);
  1819. /* count_bits(0x1010...) */
  1820. assert(count_bits(ones / 255) == max_bits / 8);
  1821. PASS("test_count_bits");
  1822. }
  1823. static void
  1824. test_count_bits_before()
  1825. {
  1826. unsigned max_bits = sizeof(tbm_bitmap_t) * 8;
  1827. tbm_bitmap_t ones = ~(tbm_bitmap_t) 0;
  1828. unsigned i;
  1829. for (i = 0; i < max_bits; i++) {
  1830. assert(count_bits_before(0, i) == 0);
  1831. assert(count_bits_before(ones, i) == i);
  1832. }
  1833. PASS("test_count_bits_before");
  1834. }
  1835. static void
  1836. test_count_bits_from()
  1837. {
  1838. unsigned max_bits = sizeof(tbm_bitmap_t) * 8;
  1839. tbm_bitmap_t ones = ~(tbm_bitmap_t) 0;
  1840. unsigned i;
  1841. for (i = 0; i < max_bits; i++) {
  1842. assert(count_bits_from(0, i) == 0);
  1843. assert(count_bits_from(ones, i) == max_bits - i);
  1844. }
  1845. PASS("test_count_bits_from");
  1846. }
  1847. static void
  1848. test_extract_bits()
  1849. {
  1850. static btrie_oct_t prefix[] = {0xff, 0x55, 0xaa, 0x00};
  1851. unsigned i;
  1852. for (i = 0; i < 32; i++)
  1853. assert(extract_bits(prefix, i, 0) == 0);
  1854. for (i = 0; i < 8; i++)
  1855. assert(extract_bits(prefix, i, 1) == 1);
  1856. for (i = 8; i < 16; i++)
  1857. assert(extract_bits(prefix, i, 1) == i % 2);
  1858. for (i = 16; i < 24; i++)
  1859. assert(extract_bits(prefix, i, 1) == (i + 1) % 2);
  1860. for (i = 24; i < 32; i++)
  1861. assert(extract_bits(prefix, i, 1) == 0);
  1862. assert(extract_bits(prefix, 2, 6) == 0x3f);
  1863. assert(extract_bits(prefix, 3, 6) == 0x3e);
  1864. assert(extract_bits(prefix, 4, 6) == 0x3d);
  1865. assert(extract_bits(prefix, 5, 6) == 0x3a);
  1866. assert(extract_bits(prefix, 6, 6) == 0x35);
  1867. assert(extract_bits(prefix, 7, 6) == 0x2a);
  1868. assert(extract_bits(prefix, 8, 6) == 0x15);
  1869. PASS("test_extract_bits");
  1870. }
  1871. static void
  1872. test_high_bits()
  1873. {
  1874. assert(high_bits(0) == 0x00);
  1875. assert(high_bits(1) == 0x80);
  1876. assert(high_bits(2) == 0xc0);
  1877. assert(high_bits(3) == 0xe0);
  1878. assert(high_bits(4) == 0xf0);
  1879. assert(high_bits(5) == 0xf8);
  1880. assert(high_bits(6) == 0xfc);
  1881. assert(high_bits(7) == 0xfe);
  1882. assert(high_bits(8) == 0xff);
  1883. PASS("test_high_bits");
  1884. }
  1885. static void
  1886. test_prefixes_equal()
  1887. {
  1888. btrie_oct_t prefix1[LC_BYTES_PER_NODE];
  1889. btrie_oct_t prefix2[LC_BYTES_PER_NODE];
  1890. unsigned i;
  1891. memset(prefix1, 0xaa, LC_BYTES_PER_NODE);
  1892. memset(prefix2, 0xaa, LC_BYTES_PER_NODE);
  1893. for (i = 0; i < 8 * LC_BYTES_PER_NODE; i++) {
  1894. assert(prefixes_equal(prefix1, prefix2, i));
  1895. prefix1[i / 8] ^= 1 << (7 - i % 8);
  1896. assert(!prefixes_equal(prefix1, prefix2, 8 * LC_BYTES_PER_NODE));
  1897. assert(prefixes_equal(prefix1, prefix2, i));
  1898. if (i + 1 < 8 * LC_BYTES_PER_NODE)
  1899. assert(!prefixes_equal(prefix1, prefix2, i + 1));
  1900. prefix1[i / 8] ^= 1 << (7 - i % 8);
  1901. }
  1902. PASS("test_prefixes_equal");
  1903. }
  1904. static void
  1905. test_common_prefix()
  1906. {
  1907. btrie_oct_t prefix1[LC_BYTES_PER_NODE];
  1908. btrie_oct_t prefix2[LC_BYTES_PER_NODE];
  1909. unsigned i;
  1910. memset(prefix1, 0x55, LC_BYTES_PER_NODE);
  1911. memset(prefix2, 0x55, LC_BYTES_PER_NODE);
  1912. for (i = 0; i < 8 * LC_BYTES_PER_NODE; i++) {
  1913. assert(common_prefix(prefix1, prefix2, i) == i);
  1914. prefix1[i / 8] ^= 1 << (7 - i % 8);
  1915. assert(common_prefix(prefix1, prefix2, 8 * LC_BYTES_PER_NODE) == i);
  1916. if (i + 1 < 8 * LC_BYTES_PER_NODE)
  1917. assert(common_prefix(prefix1, prefix2, i + 1) == i);
  1918. prefix1[i / 8] ^= 1 << (7 - i % 8);
  1919. }
  1920. PASS("test_common_prefix");
  1921. }
  1922. static void
  1923. test_base_index()
  1924. {
  1925. assert(base_index(0, 0) == 1);
  1926. assert(base_index(0, 1) == 2);
  1927. assert(base_index(1, 1) == 3);
  1928. assert(base_index(0, 2) == 4);
  1929. assert(base_index(1, 2) == 5);
  1930. assert(base_index(2, 2) == 6);
  1931. assert(base_index(3, 2) == 7);
  1932. PASS("test_base_index");
  1933. }
  1934. static void
  1935. test_has_internal_data()
  1936. {
  1937. struct tbm_node node;
  1938. unsigned plen, pfx, bi;
  1939. for (plen = 0; plen < TBM_STRIDE; plen++) {
  1940. for (pfx = 0; pfx < 1U << plen; pfx++) {
  1941. tbm_bitmap_t ancestor_mask = 0;
  1942. for (bi = base_index(pfx, plen); bi; bi >>= 1) {
  1943. node.int_bm = bit(bi);
  1944. ancestor_mask |= bit(bi);
  1945. assert(has_internal_data(&node, pfx, plen));
  1946. }
  1947. node.int_bm = ~ancestor_mask;
  1948. assert(!has_internal_data(&node, pfx, plen));
  1949. }
  1950. }
  1951. PASS("test_has_internal_data");
  1952. }
  1953. /****************************************************************/
  1954. static const btrie_oct_t numbered_bytes[] = {
  1955. 0x00,
  1956. 0x01,
  1957. 0x02,
  1958. 0x03,
  1959. 0x04,
  1960. 0x05,
  1961. 0x06,
  1962. 0x07,
  1963. 0x08,
  1964. 0x09,
  1965. 0x0a,
  1966. 0x0b,
  1967. 0x0c,
  1968. 0x0d,
  1969. 0x0e,
  1970. 0x0f,
  1971. 0x10,
  1972. 0x11,
  1973. 0x12,
  1974. 0x13,
  1975. 0x14,
  1976. 0x15,
  1977. 0x16,
  1978. 0x17,
  1979. 0x18,
  1980. 0x19,
  1981. 0x1a,
  1982. 0x1b,
  1983. 0x1c,
  1984. 0x1d,
  1985. 0x1e,
  1986. 0x1f,
  1987. 0x20,
  1988. 0x21,
  1989. 0x22,
  1990. 0x23,
  1991. 0x24,
  1992. 0x25,
  1993. 0x26,
  1994. 0x27,
  1995. 0x28,
  1996. 0x29,
  1997. 0x2a,
  1998. 0x2b,
  1999. 0x2c,
  2000. 0x2d,
  2001. 0x2e,
  2002. 0x2f,
  2003. 0x30,
  2004. 0x31,
  2005. 0x32,
  2006. 0x33,
  2007. 0x34,
  2008. 0x35,
  2009. 0x36,
  2010. 0x37,
  2011. 0x38,
  2012. 0x39,
  2013. 0x3a,
  2014. 0x3b,
  2015. 0x3c,
  2016. 0x3d,
  2017. 0x3e,
  2018. 0x3f,
  2019. };
  2020. static void
  2021. check_non_terminal_lc_node(struct lc_node *node, unsigned len)
  2022. {
  2023. assert(is_lc_node((node_t *) node));
  2024. assert(!lc_is_terminal(node));
  2025. assert(lc_len(node) == len);
  2026. }
  2027. static void
  2028. check_terminal_lc_node(struct lc_node *node, unsigned len, const void *data)
  2029. {
  2030. assert(is_lc_node((node_t *) node));
  2031. assert(lc_is_terminal(node));
  2032. assert(lc_len(node) == len);
  2033. assert(node->ptr.data == data);
  2034. }
  2035. static void
  2036. test_init_terminal_node()
  2037. {
  2038. struct btrie *btrie = btrie_init(NULL);
  2039. const void *data = (void *) 0xdeadbeef;
  2040. node_t node;
  2041. struct lc_node *head = &node.lc_node;
  2042. init_terminal_node(btrie, &node, 0,
  2043. numbered_bytes, 8 * LC_BYTES_PER_NODE, data);
  2044. check_terminal_lc_node(head, 8 * LC_BYTES_PER_NODE, data);
  2045. assert(memcmp(head->prefix, numbered_bytes, LC_BYTES_PER_NODE) == 0);
  2046. init_terminal_node(btrie, &node, 7,
  2047. numbered_bytes, 8 * LC_BYTES_PER_NODE, data);
  2048. check_terminal_lc_node(head, 8 * LC_BYTES_PER_NODE - 7, data);
  2049. assert(memcmp(head->prefix, numbered_bytes, LC_BYTES_PER_NODE) == 0);
  2050. init_terminal_node(btrie, &node, 0,
  2051. numbered_bytes, 2 * 8 * LC_BYTES_PER_NODE, data);
  2052. check_non_terminal_lc_node(head, 8 * LC_BYTES_PER_NODE);
  2053. assert(memcmp(head->prefix, numbered_bytes, LC_BYTES_PER_NODE) == 0);
  2054. {
  2055. struct lc_node *child = &head->ptr.child->lc_node;
  2056. check_terminal_lc_node(child, 8 * LC_BYTES_PER_NODE, data);
  2057. assert(memcmp(child->prefix, &numbered_bytes[LC_BYTES_PER_NODE],
  2058. LC_BYTES_PER_NODE) == 0);
  2059. }
  2060. init_terminal_node(btrie, &node, 15,
  2061. numbered_bytes, 8 * LC_BYTES_PER_NODE + 15, data);
  2062. check_non_terminal_lc_node(head, 8 * LC_BYTES_PER_NODE - 7);
  2063. assert(memcmp(head->prefix, &numbered_bytes[1], LC_BYTES_PER_NODE) == 0);
  2064. {
  2065. struct lc_node *child = &head->ptr.child->lc_node;
  2066. check_terminal_lc_node(child, 7, data);
  2067. assert(child->prefix[0] == numbered_bytes[LC_BYTES_PER_NODE + 1]);
  2068. }
  2069. PASS("test_init_terminal_node");
  2070. }
  2071. static void
  2072. test_coalesce_lc_node()
  2073. {
  2074. struct btrie *btrie = btrie_init(NULL);
  2075. const void *data = (void *) 0xdeadbeef;
  2076. node_t node;
  2077. struct lc_node *head = &node.lc_node;
  2078. /* test merging */
  2079. init_terminal_node(btrie, &node, 0,
  2080. numbered_bytes, 8 * (LC_BYTES_PER_NODE + 1), data);
  2081. check_non_terminal_lc_node(head, LC_BYTES_PER_NODE * 8);
  2082. lc_add_to_len(head, -8);
  2083. coalesce_lc_node(btrie, head, 8);
  2084. check_terminal_lc_node(head, LC_BYTES_PER_NODE * 8, data);
  2085. assert(head->prefix[LC_BYTES_PER_NODE - 1] == numbered_bytes[LC_BYTES_PER_NODE]);
  2086. /* test bit stealing */
  2087. init_terminal_node(btrie, &node, 0,
  2088. numbered_bytes, 8 * (2 * LC_BYTES_PER_NODE), data);
  2089. check_non_terminal_lc_node(head, LC_BYTES_PER_NODE * 8);
  2090. lc_add_to_len(head, -15);
  2091. coalesce_lc_node(btrie, head, 15);
  2092. check_non_terminal_lc_node(head, LC_BYTES_PER_NODE * 8 - 7);
  2093. assert(memcmp(head->prefix, numbered_bytes, LC_BYTES_PER_NODE - 1) == 0);
  2094. assert(head->prefix[LC_BYTES_PER_NODE - 1] == numbered_bytes[LC_BYTES_PER_NODE]);
  2095. {
  2096. struct lc_node *child = &head->ptr.child->lc_node;
  2097. check_terminal_lc_node(child, 8 * (LC_BYTES_PER_NODE - 1), data);
  2098. assert(memcmp(child->prefix, &numbered_bytes[LC_BYTES_PER_NODE + 1],
  2099. LC_BYTES_PER_NODE - 1) == 0);
  2100. }
  2101. PASS("test_coalesce_lc_node");
  2102. }
  2103. static void
  2104. test_shorten_lc_node()
  2105. {
  2106. struct btrie *btrie = btrie_init(NULL);
  2107. const void *data = (void *) 0xdeadbeef;
  2108. node_t node, shorter;
  2109. /* test shorten without shift */
  2110. init_terminal_node(btrie, &node, 0,
  2111. numbered_bytes, 8 * LC_BYTES_PER_NODE, data);
  2112. memset(shorter.lc_node.prefix, 0xff, LC_BYTES_PER_NODE);
  2113. shorten_lc_node(btrie, &shorter, 7, &node.lc_node, 0);
  2114. check_terminal_lc_node(&shorter.lc_node, LC_BYTES_PER_NODE * 8 - 7, data);
  2115. assert(memcmp(shorter.lc_node.prefix, numbered_bytes, LC_BYTES_PER_NODE) == 0);
  2116. /* test shorten with shift */
  2117. init_terminal_node(btrie, &node, 7,
  2118. numbered_bytes, 8 * LC_BYTES_PER_NODE, data);
  2119. memset(shorter.lc_node.prefix, 0xff, LC_BYTES_PER_NODE);
  2120. shorten_lc_node(btrie, &shorter, 9, &node.lc_node, 7);
  2121. check_terminal_lc_node(&shorter.lc_node, LC_BYTES_PER_NODE * 8 - 9, data);
  2122. assert(memcmp(shorter.lc_node.prefix, &numbered_bytes[1],
  2123. LC_BYTES_PER_NODE - 1) == 0);
  2124. {
  2125. /* test child stealing */
  2126. struct lc_node head;
  2127. node_t tail, shorter;
  2128. lc_init_flags(&head, 0, 7);
  2129. head.ptr.child = &tail;
  2130. init_empty_node(btrie, &tail);
  2131. shorten_lc_node(btrie, &shorter, 7, &head, 0);
  2132. assert(is_empty_node(&shorter));
  2133. }
  2134. PASS("test_shorten_lc_node");
  2135. }
  2136. static void
  2137. test_split_lc_node()
  2138. {
  2139. struct btrie *btrie = btrie_init(NULL);
  2140. const void *data = (void *) 0xdeadbeef;
  2141. struct lc_node node;
  2142. init_terminal_node(btrie, (node_t *) &node, 1, numbered_bytes, 25, data);
  2143. split_lc_node(btrie, &node, 1, 8);
  2144. check_non_terminal_lc_node(&node, 8);
  2145. check_terminal_lc_node(&node.ptr.child->lc_node, 16, data);
  2146. /* test conversion of terminal to non-terminal */
  2147. init_terminal_node(btrie, (node_t *) &node, 7, numbered_bytes, 10, data);
  2148. split_lc_node(btrie, &node, 7, 3);
  2149. check_non_terminal_lc_node(&node, 3);
  2150. check_terminal_lc_node(&node.ptr.child->lc_node, 0, data);
  2151. PASS("test_split_lc_node");
  2152. }
  2153. static void
  2154. test_convert_lc_node_1()
  2155. {
  2156. struct btrie *btrie = btrie_init(NULL);
  2157. const void *data = (void *) 0xdeadbeef;
  2158. struct lc_node head;
  2159. /* test tail is left */
  2160. lc_init_flags(&head, 0, 1);
  2161. head.prefix[0] = 0;
  2162. head.ptr.child = alloc_nodes(btrie, 1, 0);
  2163. init_terminal_node(btrie, head.ptr.child, 1, numbered_bytes, 1, data);
  2164. convert_lc_node_1(btrie, &head, 0);
  2165. {
  2166. node_t *result = (node_t *) &head;
  2167. assert(is_tbm_node(result));
  2168. assert(result->tbm_node.ext_bm == 0);
  2169. assert(result->tbm_node.int_bm == bit(base_index(0, 1)));
  2170. assert(*tbm_data_p(&result->tbm_node, 0, 1) == data);
  2171. }
  2172. /* test tail is right */
  2173. lc_init_flags(&head, 0, 1);
  2174. head.prefix[0] = 1;
  2175. head.ptr.child = alloc_nodes(btrie, 1, 0);
  2176. init_terminal_node(btrie, head.ptr.child, 8, numbered_bytes, 10, data);
  2177. convert_lc_node_1(btrie, &head, 7);
  2178. {
  2179. node_t *result = (node_t *) &head;
  2180. assert(is_tbm_node(result));
  2181. assert(result->tbm_node.ext_bm == 0);
  2182. assert(result->tbm_node.int_bm == bit(base_index(4, 3)));
  2183. assert(*tbm_data_p(&result->tbm_node, 4, 3) == data);
  2184. }
  2185. PASS("test_convert_lc_node_1");
  2186. }
  2187. static void
  2188. test_convert_lc_node()
  2189. {
  2190. struct btrie *btrie = btrie_init(NULL);
  2191. const void *data = (void *) 0xdeadbeef;
  2192. node_t node;
  2193. /* if (len >= TBM_STRIDE) */
  2194. init_terminal_node(btrie, &node, 7, numbered_bytes, TBM_STRIDE + 7, data);
  2195. convert_lc_node(btrie, &node.lc_node, 7);
  2196. assert(is_tbm_node(&node));
  2197. assert(node.tbm_node.ext_bm == bit(0));
  2198. assert(node.tbm_node.int_bm == 0);
  2199. check_terminal_lc_node(&tbm_ext_path(&node.tbm_node, 0)->lc_node, 0, data);
  2200. /* if (lc_is_terminal(node)) */
  2201. init_terminal_node(btrie, &node, 0, numbered_bytes, 0, data);
  2202. convert_lc_node(btrie, &node.lc_node, 0);
  2203. assert(is_tbm_node(&node));
  2204. assert(node.tbm_node.ext_bm == 0);
  2205. assert(node.tbm_node.int_bm == bit(base_index(0, 0)));
  2206. assert(*tbm_data_p(&node.tbm_node, 0, 0) == data);
  2207. /* else */
  2208. lc_init_flags(&node.lc_node, 0, TBM_STRIDE - 1);
  2209. node.lc_node.prefix[0] = 0;
  2210. node.lc_node.ptr.child = alloc_nodes(btrie, 1, 0);
  2211. init_empty_node(btrie, node.lc_node.ptr.child);
  2212. tbm_insert_data(btrie, &node.lc_node.ptr.child->tbm_node, 0, 0, data);
  2213. convert_lc_node(btrie, &node.lc_node, 0);
  2214. assert(is_tbm_node(&node));
  2215. assert(node.tbm_node.ext_bm == 0);
  2216. assert(node.tbm_node.int_bm == bit(base_index(0, TBM_STRIDE - 1)));
  2217. assert(*tbm_data_p(&node.tbm_node, 0, TBM_STRIDE - 1) == data);
  2218. PASS("test_convert_lc_node");
  2219. }
  2220. static void
  2221. test_insert_lc_node()
  2222. {
  2223. struct btrie *btrie = btrie_init(NULL);
  2224. const void *data = (void *) 0xdeadbeef;
  2225. node_t node, tail;
  2226. /* test optimized case, last_bit == 0 */
  2227. init_terminal_node(btrie, &tail, 9, numbered_bytes, 17, data);
  2228. insert_lc_node(btrie, &node, 8, 0, 0, &tail);
  2229. check_terminal_lc_node(&node.lc_node, 9, data);
  2230. assert(memcmp(node.lc_node.prefix, &numbered_bytes[1], 2) == 0);
  2231. /* test optimized case, last_bit == 1 */
  2232. init_terminal_node(btrie, &tail, 7, &numbered_bytes[0x12], 15, data);
  2233. insert_lc_node(btrie, &node, 6, 0x10, 1, &tail);
  2234. check_terminal_lc_node(&node.lc_node, 9, data);
  2235. assert(node.lc_node.prefix[0] == 0x12);
  2236. assert(node.lc_node.prefix[1] == 0x13);
  2237. /* test with shift */
  2238. init_terminal_node(btrie, &tail, 0, numbered_bytes, 8, data);
  2239. insert_lc_node(btrie, &node, 7, 0x40, 1, &tail);
  2240. check_terminal_lc_node(&node.lc_node, 9, data);
  2241. assert(node.lc_node.prefix[0] == 0x41);
  2242. assert(node.lc_node.prefix[1] == numbered_bytes[0]);
  2243. /* test with TBM node */
  2244. init_empty_node(btrie, &tail);
  2245. insert_lc_node(btrie, &node, 6, 0x40, 0, &tail);
  2246. check_non_terminal_lc_node(&node.lc_node, 1);
  2247. assert(is_tbm_node(node.lc_node.ptr.child));
  2248. PASS("test_insert_lc_node");
  2249. }
  2250. static void
  2251. test_next_pbyte()
  2252. {
  2253. assert(next_pbyte(0xff, 0, 1) == 0x80 >> (TBM_STRIDE - 1));
  2254. assert(next_pbyte(0xff, 1, 1) == (0x80 | (0x80 >> TBM_STRIDE)));
  2255. assert(next_pbyte(0xff, 2, 1) == (0xc0 | (0x80 >> (TBM_STRIDE + 1))));
  2256. assert(next_pbyte(0xff, 8 - TBM_STRIDE, 1) == 0);
  2257. assert(next_pbyte(0xff, 9 - TBM_STRIDE, 1) == 0x80);
  2258. PASS("test_next_pbyte");
  2259. }
  2260. static void
  2261. test_init_tbm_node()
  2262. {
  2263. struct btrie *btrie = btrie_init(NULL);
  2264. const void *data = (void *) 0xdeadbeef;
  2265. unsigned lr;
  2266. node_t node;
  2267. /* test root data */
  2268. init_tbm_node(btrie, &node, 0, 0, &data, NULL, NULL);
  2269. assert(is_tbm_node(&node));
  2270. assert(node.tbm_node.ext_bm == 0);
  2271. assert(node.tbm_node.int_bm == bit(base_index(0, 0)));
  2272. assert(*tbm_data_p(&node.tbm_node, 0, 0) == data);
  2273. for (lr = 0; lr < 2; lr++) {
  2274. node_t child;
  2275. node_t *left = lr ? NULL : &child;
  2276. node_t *right = lr ? &child : NULL;
  2277. unsigned base = lr ? (1U << (TBM_STRIDE - 1)) : 0;
  2278. unsigned pfx;
  2279. /* test with long LC node child */
  2280. init_terminal_node(btrie, &child, 1, numbered_bytes, TBM_STRIDE + 1, data);
  2281. init_tbm_node(btrie, &node, 0, 0, NULL, left, right);
  2282. assert(is_tbm_node(&node));
  2283. assert(node.tbm_node.ext_bm == bit(base));
  2284. assert(node.tbm_node.int_bm == 0);
  2285. check_terminal_lc_node(&tbm_ext_path(&node.tbm_node, base)->lc_node,
  2286. 1, data);
  2287. /* test with short LC node children */
  2288. init_terminal_node(btrie, &child, 1, numbered_bytes, TBM_STRIDE - 1, data);
  2289. init_tbm_node(btrie, &node, 0, 0, NULL, left, right);
  2290. assert(is_tbm_node(&node));
  2291. assert(node.tbm_node.ext_bm == 0);
  2292. assert(node.tbm_node.int_bm == bit(base_index(base >> 1, TBM_STRIDE - 1)));
  2293. assert(*tbm_data_p(&node.tbm_node, base >> 1, TBM_STRIDE - 1) == data);
  2294. /* construct TBM node with all eight combinations of having data,
  2295. * left_ext and/or right_ext in its extending paths */
  2296. init_empty_node(btrie, &child);
  2297. for (pfx = 0; pfx < 8; pfx++) {
  2298. if (pfx & 1)
  2299. tbm_insert_data(btrie, &child.tbm_node, pfx, TBM_STRIDE - 1, data);
  2300. if (pfx & 2) {
  2301. btrie_oct_t prefix0 = 0;
  2302. init_terminal_node(btrie,
  2303. tbm_insert_ext_path(btrie, &child.tbm_node, 2 * pfx),
  2304. TBM_STRIDE + 1,
  2305. &prefix0, TBM_STRIDE + 2, data);
  2306. }
  2307. if (pfx & 4) {
  2308. btrie_oct_t prefix0 = 0x80 >> TBM_STRIDE;
  2309. init_terminal_node(btrie,
  2310. tbm_insert_ext_path(btrie, &child.tbm_node, 2 * pfx + 1),
  2311. TBM_STRIDE + 1,
  2312. &prefix0, TBM_STRIDE + 3, data);
  2313. }
  2314. }
  2315. init_tbm_node(btrie, &node, 0, 0, NULL, left, right);
  2316. for (pfx = 0; pfx < 8; pfx++) {
  2317. unsigned base = lr ? (1U << (TBM_STRIDE - 1)) : 0;
  2318. node_t *ext_path = tbm_ext_path(&node.tbm_node, base + pfx);
  2319. if (pfx == 0)
  2320. assert(ext_path == NULL);
  2321. else if (pfx == 1)
  2322. check_terminal_lc_node(&ext_path->lc_node, 0, data);
  2323. else if (pfx == 2) {
  2324. check_terminal_lc_node(&ext_path->lc_node, 2, data);
  2325. assert(ext_path->lc_node.prefix[0] == 0);
  2326. }
  2327. else if (pfx == 4) {
  2328. check_terminal_lc_node(&ext_path->lc_node, 3, data);
  2329. assert(ext_path->lc_node.prefix[0] == (0x80 >> TBM_STRIDE));
  2330. }
  2331. else {
  2332. tbm_bitmap_t int_bm = 0;
  2333. assert(is_tbm_node(ext_path));
  2334. if (pfx & 1) {
  2335. int_bm |= bit(base_index(0, 0));
  2336. assert(*tbm_data_p(&ext_path->tbm_node, 0, 0) == data);
  2337. }
  2338. if (pfx & 2) {
  2339. int_bm |= bit(base_index(0, 2));
  2340. assert(*tbm_data_p(&ext_path->tbm_node, 0, 2) == data);
  2341. }
  2342. if (pfx & 4) {
  2343. int_bm |= bit(base_index(4, 3));
  2344. assert(*tbm_data_p(&ext_path->tbm_node, 4, 3) == data);
  2345. }
  2346. assert(ext_path->tbm_node.int_bm == int_bm);
  2347. }
  2348. }
  2349. }
  2350. PASS("test_init_tbm_node");
  2351. }
  2352. static void
  2353. test_add_to_trie()
  2354. {
  2355. struct btrie *btrie = btrie_init(NULL);
  2356. const void *data = (void *) 0xdeadbeef;
  2357. enum btrie_result result;
  2358. unsigned pfx, plen;
  2359. node_t root;
  2360. /* test initial insertion */
  2361. init_empty_node(btrie, &root);
  2362. result = add_to_trie(btrie, &root, 0,
  2363. numbered_bytes, 8 * 2 * LC_BYTES_PER_NODE, data);
  2364. assert(result == BTRIE_OKAY);
  2365. check_non_terminal_lc_node(&root.lc_node, 8 * LC_BYTES_PER_NODE);
  2366. check_terminal_lc_node(&root.lc_node.ptr.child->lc_node,
  2367. 8 * LC_BYTES_PER_NODE, data);
  2368. /* test can follow LC node to tail, and then detect duplicate prefix */
  2369. result = add_to_trie(btrie, &root, 0,
  2370. numbered_bytes, 8 * 2 * LC_BYTES_PER_NODE, data);
  2371. assert(result == BTRIE_DUPLICATE_PREFIX);
  2372. /* test can insert new TBM node within existing LC node */
  2373. result = add_to_trie(btrie, &root, 0,
  2374. &numbered_bytes[1], 16, data);
  2375. assert(result == BTRIE_OKAY);
  2376. check_non_terminal_lc_node(&root.lc_node, 7);
  2377. assert(is_tbm_node(root.lc_node.ptr.child));
  2378. /* test can convert terminal LC node to TBM node */
  2379. init_terminal_node(btrie, &root, 0, numbered_bytes, 12, data);
  2380. result = add_to_trie(btrie, &root, 0, numbered_bytes, 24, data);
  2381. assert(result == BTRIE_OKAY);
  2382. check_non_terminal_lc_node(&root.lc_node, 12);
  2383. assert(is_tbm_node(root.lc_node.ptr.child));
  2384. /* test can insert internal prefix data in TBM node */
  2385. for (plen = 0; plen < TBM_STRIDE; plen++) {
  2386. for (pfx = 0; pfx < (1U << plen); pfx++) {
  2387. btrie_oct_t prefix0 = plen ? pfx << (8 - plen) : 0;
  2388. init_empty_node(btrie, &root);
  2389. init_terminal_node(btrie, tbm_insert_ext_path(btrie, &root.tbm_node, 0),
  2390. TBM_STRIDE,
  2391. numbered_bytes, 8, data);
  2392. result = add_to_trie(btrie, &root, 0, &prefix0, plen, data);
  2393. assert(result == BTRIE_OKAY);
  2394. assert(is_tbm_node(&root));
  2395. assert(root.tbm_node.ext_bm == bit(0));
  2396. assert(root.tbm_node.int_bm == bit(base_index(pfx, plen)));
  2397. assert(*tbm_data_p(&root.tbm_node, pfx, plen) == data);
  2398. result = add_to_trie(btrie, &root, 0, &prefix0, plen, data);
  2399. assert(result == BTRIE_DUPLICATE_PREFIX);
  2400. }
  2401. }
  2402. /* test can add extending paths to TBM node */
  2403. for (pfx = 0; pfx < (1U << TBM_STRIDE); pfx++) {
  2404. btrie_oct_t prefix0 = pfx << (8 - TBM_STRIDE);
  2405. init_empty_node(btrie, &root);
  2406. tbm_insert_data(btrie, &root.tbm_node, 0, 0, data);
  2407. result = add_to_trie(btrie, &root, 0, &prefix0, 8, data);
  2408. assert(result == BTRIE_OKAY);
  2409. assert(is_tbm_node(&root));
  2410. assert(root.tbm_node.ext_bm == bit(pfx));
  2411. assert(root.tbm_node.int_bm == bit(base_index(0, 0)));
  2412. check_terminal_lc_node(&tbm_ext_path(&root.tbm_node, pfx)->lc_node,
  2413. 8 - TBM_STRIDE, data);
  2414. result = add_to_trie(btrie, &root, 0, &prefix0, 8, data);
  2415. assert(result == BTRIE_DUPLICATE_PREFIX);
  2416. }
  2417. /* test can follow extending path */
  2418. init_empty_node(btrie, &root);
  2419. init_terminal_node(btrie,
  2420. tbm_insert_ext_path(btrie, &root.tbm_node, 0), TBM_STRIDE,
  2421. numbered_bytes, 8, data);
  2422. result = add_to_trie(btrie, &root, 0, numbered_bytes, 7, data);
  2423. assert(result == BTRIE_OKAY);
  2424. assert(root.tbm_node.ext_bm == bit(0));
  2425. assert(root.tbm_node.int_bm == 0);
  2426. check_non_terminal_lc_node(&root.tbm_node.ptr.children[0].lc_node,
  2427. 7 - TBM_STRIDE);
  2428. PASS("test_add_to_trie");
  2429. }
  2430. static void
  2431. test_search_trie()
  2432. {
  2433. struct btrie *btrie = btrie_init(NULL);
  2434. const void *data01 = (void *) 0xdead0001;
  2435. const void *data11 = (void *) 0xdead0101;
  2436. const void *data = (void *) 0xdeadbeef;
  2437. unsigned plen, pfx;
  2438. node_t root;
  2439. /* test can follow chain of LC nodes to an exact match */
  2440. init_empty_node(btrie, &root);
  2441. add_to_trie(btrie, &root, 0,
  2442. numbered_bytes, 8 * 2 * LC_BYTES_PER_NODE, data);
  2443. assert(search_trie(&root, 0, numbered_bytes, 8 * 2 * LC_BYTES_PER_NODE) == data);
  2444. assert(search_trie(&root, 0, numbered_bytes, 8 * 2 * LC_BYTES_PER_NODE + 1) == data);
  2445. assert(search_trie(&root, 0, numbered_bytes, 8 * 2 * LC_BYTES_PER_NODE - 1) == NULL);
  2446. assert(search_trie(&root, 0, &numbered_bytes[1], 8 * 2 * LC_BYTES_PER_NODE) == NULL);
  2447. /* test can follow extending path to an exact match */
  2448. for (pfx = 0; pfx < (1U << TBM_STRIDE); pfx++) {
  2449. btrie_oct_t prefix0 = pfx << (8 - TBM_STRIDE);
  2450. init_empty_node(btrie, &root);
  2451. tbm_insert_data(btrie, &root.tbm_node, 0, 1, data01);
  2452. tbm_insert_data(btrie, &root.tbm_node, 1, 1, data11);
  2453. add_to_trie(btrie, &root, 0, &prefix0, 8, data);
  2454. assert(search_trie(&root, 0, &prefix0, 8) == data);
  2455. /* test that last matching TBM internal prefix gets picked up */
  2456. if (prefix0 & 0x80)
  2457. assert(search_trie(&root, 0, &prefix0, 7) == data11);
  2458. else
  2459. assert(search_trie(&root, 0, &prefix0, 7) == data01);
  2460. prefix0 ^= 1 << (8 - TBM_STRIDE);
  2461. if (prefix0 & 0x80)
  2462. assert(search_trie(&root, 0, &prefix0, 8) == data11);
  2463. else
  2464. assert(search_trie(&root, 0, &prefix0, 8) == data01);
  2465. }
  2466. /* test finding of TBM internal prefixes */
  2467. init_empty_node(btrie, &root);
  2468. tbm_insert_data(btrie, &root.tbm_node, 0, 1, data01);
  2469. tbm_insert_data(btrie, &root.tbm_node, 1, 1, data11);
  2470. assert(search_trie(&root, 0, numbered_bytes, 0) == NULL);
  2471. for (plen = 1; plen < TBM_STRIDE; plen++) {
  2472. for (pfx = 0; pfx < (1U << TBM_STRIDE); pfx++) {
  2473. btrie_oct_t prefix0 = pfx << (8 - plen);
  2474. if (prefix0 & 0x80)
  2475. assert(search_trie(&root, 0, &prefix0, plen) == data11);
  2476. else
  2477. assert(search_trie(&root, 0, &prefix0, plen) == data01);
  2478. }
  2479. }
  2480. PASS("test_search_trie");
  2481. }
  2482. static int
  2483. unit_tests()
  2484. {
  2485. test_struct_node_packing();
  2486. test_bit();
  2487. test_count_bits();
  2488. test_count_bits_before();
  2489. test_count_bits_from();
  2490. test_extract_bits();
  2491. test_high_bits();
  2492. test_prefixes_equal();
  2493. test_common_prefix();
  2494. test_base_index();
  2495. test_has_internal_data();
  2496. test_init_terminal_node();
  2497. test_coalesce_lc_node();
  2498. test_shorten_lc_node();
  2499. test_split_lc_node();
  2500. test_convert_lc_node_1();
  2501. test_convert_lc_node();
  2502. test_insert_lc_node();
  2503. test_next_pbyte();
  2504. test_init_tbm_node();
  2505. test_add_to_trie();
  2506. test_search_trie();
  2507. puts("\nOK");
  2508. return 0;
  2509. }
  2510. /*****************************************************************
  2511. *
  2512. * btrie_dump: print out the trie structure (for testing)
  2513. *
  2514. */
  2515. #define INDENT_FILL "....:....|....:....|....:....|....:....|"
  2516. static void dump_node(const node_t *node, unsigned pos, btrie_oct_t *prefix,
  2517. int indent);
  2518. static void
  2519. dump_prefix(btrie_oct_t *prefix, unsigned len, int indent, const char *tail)
  2520. {
  2521. unsigned i;
  2522. printf("%*.*s0x", indent, indent, INDENT_FILL);
  2523. for (i = 0; i < len / 8; i++)
  2524. printf("%02x", prefix[i]);
  2525. if (len % 8)
  2526. printf("%02x", prefix[len / 8] & high_bits(len % 8));
  2527. printf("/%u%s", len, tail);
  2528. }
  2529. /* the opposite of extract_bits, sets a short string of bits from integer */
  2530. static void
  2531. insert_bits(btrie_oct_t *prefix, unsigned pos, btrie_oct_t pfx, unsigned nbits)
  2532. {
  2533. if (nbits != 0) {
  2534. unsigned v = (prefix[pos / 8] << 8) + prefix[pos / 8 + 1];
  2535. unsigned mask = (1U << nbits) - 1;
  2536. unsigned shift = 16 - (pos % 8) - nbits;
  2537. v = (v & ~(mask << shift)) | (pfx << shift);
  2538. prefix[pos / 8] = v >> 8;
  2539. prefix[pos / 8 + 1] = (btrie_oct_t) v;
  2540. }
  2541. }
  2542. static void
  2543. dump_tbm_node(const struct tbm_node *node, unsigned pos,
  2544. btrie_oct_t *prefix, int indent)
  2545. {
  2546. unsigned pfx = 0, plen = 0;
  2547. dump_prefix(prefix, pos, indent, " [tbm]\n");
  2548. for (;;) {
  2549. if (plen < TBM_STRIDE) {
  2550. const void **data_p = tbm_data_p(node, pfx, plen);
  2551. if (data_p) {
  2552. insert_bits(prefix, pos, pfx, plen);
  2553. dump_prefix(prefix, pos + plen, indent, "");
  2554. printf(" [%u/%u] (%s)\n", pfx, plen, (const char *) *data_p);
  2555. }
  2556. plen++;
  2557. pfx <<= 1;
  2558. }
  2559. else {
  2560. const node_t *ext_path = tbm_ext_path(node, pfx);
  2561. if (ext_path) {
  2562. insert_bits(prefix, pos, pfx, TBM_STRIDE);
  2563. dump_node(ext_path, pos + TBM_STRIDE, prefix, indent + 1);
  2564. }
  2565. while (pfx & 1) {
  2566. if (--plen == 0)
  2567. return;
  2568. pfx >>= 1;
  2569. }
  2570. pfx++;
  2571. }
  2572. }
  2573. }
  2574. static void
  2575. dump_lc_node(const struct lc_node *node, unsigned pos,
  2576. btrie_oct_t *prefix, int indent)
  2577. {
  2578. unsigned end = pos + lc_len(node);
  2579. btrie_oct_t save_prefix = prefix[lc_shift(pos)];
  2580. memcpy(&prefix[lc_shift(pos)], node->prefix, lc_bytes(node, pos));
  2581. if (lc_is_terminal(node)) {
  2582. dump_prefix(prefix, end, indent, "");
  2583. printf(" (%s)\n", (const char *) node->ptr.data);
  2584. }
  2585. else {
  2586. dump_prefix(prefix, end, indent, "\n");
  2587. dump_node(node->ptr.child, end, prefix, indent + 1);
  2588. }
  2589. prefix[lc_shift(pos)] = save_prefix;
  2590. if (lc_bytes(node, pos) > 1)
  2591. memset(&prefix[lc_shift(pos) + 1], 0, lc_bytes(node, pos) - 1);
  2592. }
  2593. static void
  2594. dump_node(const node_t *node, unsigned pos, btrie_oct_t *prefix, int indent)
  2595. {
  2596. if (is_lc_node(node))
  2597. dump_lc_node(&node->lc_node, pos, prefix, indent);
  2598. else
  2599. dump_tbm_node(&node->tbm_node, pos, prefix, indent);
  2600. }
  2601. static void
  2602. btrie_dump(struct btrie *btrie)
  2603. {
  2604. btrie_oct_t prefix[(BTRIE_MAX_PREFIX + 7) / 8];
  2605. memset(prefix, 0, sizeof(prefix));
  2606. dump_node(&btrie->root, 0, prefix, 0);
  2607. puts(btrie_stats(btrie));
  2608. }
  2609. /****************************************************************
  2610. *
  2611. * test program - just enough to construct a trie and preform a lookup
  2612. *
  2613. */
  2614. #include <arpa/inet.h>
  2615. static int
  2616. parse_prefix(const char *arg, btrie_oct_t prefix[16], unsigned *len)
  2617. {
  2618. char addrbuf[128];
  2619. return sscanf(arg, "%127[0-9a-fA-F:]/%u", addrbuf, len) == 2 && inet_pton(AF_INET6, addrbuf, prefix) == 1;
  2620. }
  2621. static int
  2622. test_btrie(int argc, char *argv[])
  2623. {
  2624. struct btrie *btrie = btrie_init(NULL);
  2625. int i;
  2626. btrie_oct_t prefix[16];
  2627. unsigned len;
  2628. for (i = 1; i < argc - 1; i++) {
  2629. if (!parse_prefix(argv[i], prefix, &len)) {
  2630. fprintf(stderr, "Can not parse arg '%s'\n", argv[i]);
  2631. return 1;
  2632. }
  2633. btrie_add_prefix(btrie, prefix, len, argv[i]);
  2634. }
  2635. btrie_dump(btrie);
  2636. if (argc > 1) {
  2637. const void *data;
  2638. if (!parse_prefix(argv[argc - 1], prefix, &len)) {
  2639. fprintf(stderr, "Can not parse arg '%s'\n", argv[argc - 1]);
  2640. return 1;
  2641. }
  2642. data = btrie_lookup(btrie, prefix, 128);
  2643. printf("lookup(%s) => %s\n", argv[argc - 1], (const char *) data);
  2644. }
  2645. return 0;
  2646. }
  2647. int main(int argc, char *argv[])
  2648. {
  2649. if ((pgm_name = strrchr(argv[0], '/')) != NULL)
  2650. pgm_name++;
  2651. else
  2652. pgm_name = argv[0];
  2653. if (argc > 1)
  2654. return test_btrie(argc, argv);
  2655. else
  2656. return unit_tests();
  2657. }
  2658. #endif /* TEST */