You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

acism.c 3.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. /*
  2. ** Copyright (C) 2009-2014 Mischa Sandberg <mischasan@gmail.com>
  3. **
  4. ** This program is free software; you can redistribute it and/or modify
  5. ** it under the terms of the GNU Lesser General Public License Version as
  6. ** published by the Free Software Foundation. You may not use, modify or
  7. ** distribute this program under any other version of the GNU Lesser General
  8. ** Public License.
  9. **
  10. ** This program is distributed in the hope that it will be useful,
  11. ** but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. ** GNU Lesser General Public License for more details.
  14. **
  15. ** You should have received a copy of the GNU Lesser General Public License
  16. ** along with this program; if not, write to the Free Software
  17. ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. */
  19. #include <glib.h>
  20. #include "_acism.h"
  21. #include "unix-std.h"
  22. #define BACK ((SYMBOL)0)
  23. #define ROOT ((STATE) 0)
  24. extern const guchar lc_map[256];
  25. int
  26. acism_lookup(ac_trie_t const *psp, const char *text, size_t len,
  27. ACISM_ACTION *cb, void *context, int *statep, bool caseless)
  28. {
  29. char const *cp = text, *endp = cp + len;
  30. uint8_t s;
  31. STATE state = *statep;
  32. int ret = 0;
  33. while (cp < endp) {
  34. s = caseless ? lc_map[(guint8)*cp++] : *cp++;
  35. _SYMBOL sym = psp->symv[s];
  36. if (!sym) {
  37. // Input byte is not in any pattern string.
  38. state = ROOT;
  39. continue;
  40. }
  41. // Search for a valid transition from this (state, sym),
  42. // following the backref chain.
  43. TRAN next;
  44. while (!t_valid(psp, next = p_tran(psp, state, sym)) && state != ROOT) {
  45. TRAN back = p_tran(psp, state, BACK);
  46. state = t_valid(psp, back) ? t_next(psp, back) : ROOT;
  47. }
  48. if (!t_valid(psp, next))
  49. continue;
  50. if (!(next & (IS_MATCH | IS_SUFFIX))) {
  51. // No complete match yet; keep going.
  52. state = t_next(psp, next);
  53. continue;
  54. }
  55. // At this point, one or more patterns have matched.
  56. // Find all matches by following the backref chain.
  57. // A valid node for (sym) with no SUFFIX flag marks the
  58. // end of the suffix chain.
  59. // In the same backref traversal, find a new (state),
  60. // if the original transition is to a leaf.
  61. STATE s = state;
  62. // Initially state is ROOT. The chain search saves the
  63. // first state from which the next char has a transition.
  64. state = t_isleaf(psp, next) ? 0 : t_next(psp, next);
  65. while (1) {
  66. if (t_valid(psp, next)) {
  67. if (next & IS_MATCH) {
  68. unsigned strno, ss = s + sym, i;
  69. if (t_isleaf(psp, psp->tranv[ss])) {
  70. strno = t_strno(psp, psp->tranv[ss]);
  71. } else {
  72. for (i = p_hash(psp, ss); psp->hashv[i].state != ss; ++i);
  73. strno = psp->hashv[i].strno;
  74. }
  75. if ((ret = cb(strno, cp - text, context)))
  76. goto EXIT;
  77. }
  78. if (!state && !t_isleaf(psp, next))
  79. state = t_next(psp, next);
  80. if ( state && !(next & IS_SUFFIX))
  81. break;
  82. }
  83. if (s == ROOT)
  84. break;
  85. TRAN b = p_tran(psp, s, BACK);
  86. s = t_valid(psp, b) ? t_next(psp, b) : ROOT;
  87. next = p_tran(psp, s, sym);
  88. }
  89. }
  90. EXIT:
  91. *statep = state;
  92. return ret;
  93. }
  94. void
  95. acism_destroy(ac_trie_t *psp)
  96. {
  97. if (!psp) return;
  98. if (psp->flags & IS_MMAP)
  99. munmap((char*)psp->tranv - sizeof(ac_trie_t),
  100. sizeof(ac_trie_t) + p_size(psp));
  101. else g_free(psp->tranv);
  102. g_free(psp);
  103. }
  104. //EOF