You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

acism.c 3.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. ** Copyright (C) 2009-2014 Mischa Sandberg <mischasan@gmail.com>
  18. **
  19. ** This program is free software; you can redistribute it and/or modify
  20. ** it under the terms of the GNU Lesser General Public License Version as
  21. ** published by the Free Software Foundation. You may not use, modify or
  22. ** distribute this program under any other version of the GNU Lesser General
  23. ** Public License.
  24. **
  25. ** This program is distributed in the hope that it will be useful,
  26. ** but WITHOUT ANY WARRANTY; without even the implied warranty of
  27. ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  28. ** GNU Lesser General Public License for more details.
  29. **
  30. ** You should have received a copy of the GNU Lesser General Public License
  31. ** along with this program; if not, write to the Free Software
  32. ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  33. */
  34. #include <glib.h>
  35. #include "_acism.h"
  36. #include "unix-std.h"
  37. #define BACK ((SYMBOL) 0)
  38. #define ROOT ((STATE) 0)
  39. extern const unsigned char lc_map[256];
  40. int acism_lookup(ac_trie_t const *psp, const char *text, size_t len,
  41. ACISM_ACTION *cb, void *context, int *statep, bool caseless)
  42. {
  43. char const *cp = text, *endp = cp + len;
  44. uint8_t s;
  45. STATE state = *statep;
  46. int ret = 0;
  47. while (cp < endp) {
  48. s = caseless ? lc_map[(uint8_t) *cp++] : *cp++;
  49. _SYMBOL sym = psp->symv[s];
  50. if (!sym) {
  51. // Input byte is not in any pattern string.
  52. state = ROOT;
  53. continue;
  54. }
  55. // Search for a valid transition from this (state, sym),
  56. // following the backref chain.
  57. TRAN next;
  58. while (!t_valid(psp, next = p_tran(psp, state, sym)) && state != ROOT) {
  59. TRAN back = p_tran(psp, state, BACK);
  60. state = t_valid(psp, back) ? t_next(psp, back) : ROOT;
  61. }
  62. if (!t_valid(psp, next))
  63. continue;
  64. if (!(next & (IS_MATCH | IS_SUFFIX))) {
  65. // No complete match yet; keep going.
  66. state = t_next(psp, next);
  67. continue;
  68. }
  69. // At this point, one or more patterns have matched.
  70. // Find all matches by following the backref chain.
  71. // A valid node for (sym) with no SUFFIX flag marks the
  72. // end of the suffix chain.
  73. // In the same backref traversal, find a new (state),
  74. // if the original transition is to a leaf.
  75. STATE s = state;
  76. // Initially state is ROOT. The chain search saves the
  77. // first state from which the next char has a transition.
  78. state = t_isleaf(psp, next) ? 0 : t_next(psp, next);
  79. while (1) {
  80. if (t_valid(psp, next)) {
  81. if (next & IS_MATCH) {
  82. unsigned strno, ss = s + sym, i;
  83. if (t_isleaf(psp, psp->tranv[ss])) {
  84. strno = t_strno(psp, psp->tranv[ss]);
  85. }
  86. else {
  87. for (i = p_hash(psp, ss); psp->hashv[i].state != ss; ++i)
  88. ;
  89. strno = psp->hashv[i].strno;
  90. }
  91. if ((ret = cb(strno, cp - text, context)))
  92. goto EXIT;
  93. }
  94. if (!state && !t_isleaf(psp, next))
  95. state = t_next(psp, next);
  96. if (state && !(next & IS_SUFFIX))
  97. break;
  98. }
  99. if (s == ROOT)
  100. break;
  101. TRAN b = p_tran(psp, s, BACK);
  102. s = t_valid(psp, b) ? t_next(psp, b) : ROOT;
  103. next = p_tran(psp, s, sym);
  104. }
  105. }
  106. EXIT:
  107. *statep = state;
  108. return ret;
  109. }
  110. void acism_destroy(ac_trie_t *psp)
  111. {
  112. if (!psp) return;
  113. if (psp->flags & IS_MMAP)
  114. munmap((char *) psp->tranv - sizeof(ac_trie_t),
  115. sizeof(ac_trie_t) + p_size(psp));
  116. else
  117. g_free(psp->tranv);
  118. g_free(psp);
  119. }
  120. //EOF