123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141 |
- /*
- * Copyright 2024 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /*
- ** Copyright (C) 2009-2014 Mischa Sandberg <mischasan@gmail.com>
- **
- ** This program is free software; you can redistribute it and/or modify
- ** it under the terms of the GNU Lesser General Public License Version as
- ** published by the Free Software Foundation. You may not use, modify or
- ** distribute this program under any other version of the GNU Lesser General
- ** Public License.
- **
- ** This program is distributed in the hope that it will be useful,
- ** but WITHOUT ANY WARRANTY; without even the implied warranty of
- ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ** GNU Lesser General Public License for more details.
- **
- ** You should have received a copy of the GNU Lesser General Public License
- ** along with this program; if not, write to the Free Software
- ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
- #include <glib.h>
-
- #include "_acism.h"
- #include "unix-std.h"
-
- #define BACK ((SYMBOL) 0)
- #define ROOT ((STATE) 0)
- extern const unsigned char lc_map[256];
-
- int acism_lookup(ac_trie_t const *psp, const char *text, size_t len,
- ACISM_ACTION *cb, void *context, int *statep, bool caseless)
- {
- char const *cp = text, *endp = cp + len;
- uint8_t s;
- STATE state = *statep;
- int ret = 0;
-
- while (cp < endp) {
- s = caseless ? lc_map[(uint8_t) *cp++] : *cp++;
- _SYMBOL sym = psp->symv[s];
- if (!sym) {
- // Input byte is not in any pattern string.
- state = ROOT;
- continue;
- }
-
- // Search for a valid transition from this (state, sym),
- // following the backref chain.
-
- TRAN next;
- while (!t_valid(psp, next = p_tran(psp, state, sym)) && state != ROOT) {
- TRAN back = p_tran(psp, state, BACK);
- state = t_valid(psp, back) ? t_next(psp, back) : ROOT;
- }
-
- if (!t_valid(psp, next))
- continue;
-
- if (!(next & (IS_MATCH | IS_SUFFIX))) {
- // No complete match yet; keep going.
- state = t_next(psp, next);
- continue;
- }
-
- // At this point, one or more patterns have matched.
- // Find all matches by following the backref chain.
- // A valid node for (sym) with no SUFFIX flag marks the
- // end of the suffix chain.
- // In the same backref traversal, find a new (state),
- // if the original transition is to a leaf.
-
- STATE s = state;
-
- // Initially state is ROOT. The chain search saves the
- // first state from which the next char has a transition.
- state = t_isleaf(psp, next) ? 0 : t_next(psp, next);
-
- while (1) {
-
- if (t_valid(psp, next)) {
-
- if (next & IS_MATCH) {
- unsigned strno, ss = s + sym, i;
- if (t_isleaf(psp, psp->tranv[ss])) {
- strno = t_strno(psp, psp->tranv[ss]);
- }
- else {
- for (i = p_hash(psp, ss); psp->hashv[i].state != ss; ++i)
- ;
- strno = psp->hashv[i].strno;
- }
-
- if ((ret = cb(strno, cp - text, context)))
- goto EXIT;
- }
-
- if (!state && !t_isleaf(psp, next))
- state = t_next(psp, next);
- if (state && !(next & IS_SUFFIX))
- break;
- }
-
- if (s == ROOT)
- break;
-
- TRAN b = p_tran(psp, s, BACK);
- s = t_valid(psp, b) ? t_next(psp, b) : ROOT;
- next = p_tran(psp, s, sym);
- }
- }
- EXIT:
- *statep = state;
- return ret;
- }
-
- void acism_destroy(ac_trie_t *psp)
- {
- if (!psp) return;
- if (psp->flags & IS_MMAP)
- munmap((char *) psp->tranv - sizeof(ac_trie_t),
- sizeof(ac_trie_t) + p_size(psp));
- else
- g_free(psp->tranv);
- g_free(psp);
- }
- //EOF
|