You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

_acism.h 3.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. /*
  2. ** Copyright (C) 2009-2014 Mischa Sandberg <mischasan@gmail.com>
  3. **
  4. ** This program is free software; you can redistribute it and/or modify
  5. ** it under the terms of the GNU Lesser General Public License Version 3 as
  6. ** published by the Free Software Foundation. You may not use, modify or
  7. ** distribute this program under any other version of the GNU Lesser General
  8. ** Public License.
  9. **
  10. ** This program is distributed in the hope that it will be useful,
  11. ** but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. ** GNU Lesser General Public License for more details.
  14. **
  15. ** You should have received a copy of the GNU Lesser General Public License
  16. ** along with this program; if not, write to the Free Software
  17. ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. */
  19. #ifndef _ACISM_H
  20. #define _ACISM_H
  21. #include <stdint.h>
  22. #include <stdlib.h> // malloc
  23. #include <string.h> // memcpy
  24. typedef int (*qsort_cmp)(const void *, const void *);
  25. // "Width" specifier for different plats
  26. #if __LONG_MAX__ == 9223372036854775807LL
  27. # ifdef __APPLE__
  28. # define F64 "ll"
  29. # else
  30. # define F64 "l"
  31. # endif
  32. #elif __LONG_MAX__ == 2147483647L || defined(_LONG_LONG) || defined(__sun) // AIX 6.1 ...
  33. # define F64 "ll"
  34. #else
  35. //XXX Assuming F64 is "ll" for VS
  36. # define F64 "ll"
  37. #endif
  38. #ifndef ACISM_SIZE
  39. # define ACISM_SIZE 4
  40. #endif
  41. #if ACISM_SIZE == 8
  42. typedef uint64_t TRAN, STATE, STRNO;
  43. # define SYM_BITS 9U
  44. # define SYM_MASK 511
  45. # define FNO F64
  46. #else
  47. typedef uint32_t TRAN, STATE, STRNO;
  48. # define SYM_BITS psp->sym_bits
  49. # define SYM_MASK psp->sym_mask
  50. # define FNO
  51. #endif
  52. typedef uint16_t SYMBOL;
  53. typedef unsigned _SYMBOL; // An efficient stacklocal SYMBOL
  54. #define BACK ((SYMBOL)0)
  55. #define ROOT ((STATE) 0)
  56. // MATCH and SUFFIX are the top 2 bits of a TRAN:
  57. enum {
  58. IS_MATCH = (TRAN)1 << (8*sizeof(TRAN) - 1),
  59. IS_SUFFIX = (TRAN)1 << (8*sizeof(TRAN) - 2),
  60. T_FLAGS = IS_MATCH | IS_SUFFIX
  61. };
  62. typedef struct { STATE state; STRNO strno; } STRASH;
  63. struct acism {
  64. TRAN* tranv;
  65. STRASH* hashv;
  66. unsigned flags;
  67. # define IS_MMAP 1
  68. #if ACISM_SIZE < 8
  69. TRAN sym_mask;
  70. unsigned sym_bits;
  71. #endif
  72. unsigned hash_mod; // search hashv starting at (state + sym) % hash_mod.
  73. unsigned hash_size; // #(hashv): hash_mod plus the overflows past [hash_mod-1]
  74. unsigned tran_size; // #(tranv)
  75. unsigned nsyms, nchars, nstrs, maxlen;
  76. SYMBOL symv[256];
  77. };
  78. #include "acism.h"
  79. // p_size: size of tranv + hashv
  80. static inline size_t p_size(ACISM const *psp)
  81. { return psp->hash_size * sizeof*psp->hashv
  82. + psp->tran_size * sizeof*psp->tranv; }
  83. static inline unsigned p_hash(ACISM const *psp, STATE s)
  84. { return s * 107 % psp->hash_mod; }
  85. static inline void set_tranv(ACISM *psp, void *mem)
  86. { psp->hashv = (STRASH*)&(psp->tranv = (TRAN*)mem)[psp->tran_size]; }
  87. // TRAN accessors. For ACISM_SIZE=8, SYM_{BITS,MASK} do not use psp.
  88. static inline TRAN p_tran(ACISM const *psp, STATE s, _SYMBOL sym)
  89. { return psp->tranv[s + sym] ^ sym; }
  90. static inline _SYMBOL t_sym(ACISM const *psp, TRAN t) { (void)psp; return t & SYM_MASK; }
  91. static inline STATE t_next(ACISM const *psp, TRAN t) { (void)psp; return (t & ~T_FLAGS) >> SYM_BITS; }
  92. static inline int t_isleaf(ACISM const *psp, TRAN t) { return t_next(psp, t) >= psp->tran_size; }
  93. static inline int t_strno(ACISM const *psp, TRAN t) { return t_next(psp, t) - psp->tran_size; }
  94. static inline _SYMBOL t_valid(ACISM const *psp, TRAN t) { return !t_sym(psp, t); }
  95. #endif//_ACISM_H