aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/aho-corasick
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/aho-corasick')
-rw-r--r--contrib/aho-corasick/acism.c191
1 files changed, 104 insertions, 87 deletions
diff --git a/contrib/aho-corasick/acism.c b/contrib/aho-corasick/acism.c
index e2b48a590..b0cee0d66 100644
--- a/contrib/aho-corasick/acism.c
+++ b/contrib/aho-corasick/acism.c
@@ -1,4 +1,20 @@
/*
+ * Copyright 2024 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
** Copyright (C) 2009-2014 Mischa Sandberg <mischasan@gmail.com>
**
** This program is free software; you can redistribute it and/or modify
@@ -22,103 +38,104 @@
#include "_acism.h"
#include "unix-std.h"
-#define BACK ((SYMBOL)0)
+#define BACK ((SYMBOL) 0)
#define ROOT ((STATE) 0)
-extern const guchar lc_map[256];
+extern const unsigned char lc_map[256];
-int
-acism_lookup(ac_trie_t const *psp, const char *text, size_t len,
- ACISM_ACTION *cb, void *context, int *statep, bool caseless)
+int acism_lookup(ac_trie_t const *psp, const char *text, size_t len,
+ ACISM_ACTION *cb, void *context, int *statep, bool caseless)
{
- char const *cp = text, *endp = cp + len;
- uint8_t s;
- STATE state = *statep;
- int ret = 0;
-
- while (cp < endp) {
- s = caseless ? lc_map[(guint8)*cp++] : *cp++;
- _SYMBOL sym = psp->symv[s];
- if (!sym) {
- // Input byte is not in any pattern string.
- state = ROOT;
- continue;
- }
-
- // Search for a valid transition from this (state, sym),
- // following the backref chain.
-
- TRAN next;
- while (!t_valid(psp, next = p_tran(psp, state, sym)) && state != ROOT) {
- TRAN back = p_tran(psp, state, BACK);
- state = t_valid(psp, back) ? t_next(psp, back) : ROOT;
- }
-
- if (!t_valid(psp, next))
- continue;
-
- if (!(next & (IS_MATCH | IS_SUFFIX))) {
- // No complete match yet; keep going.
- state = t_next(psp, next);
- continue;
- }
-
- // At this point, one or more patterns have matched.
- // Find all matches by following the backref chain.
- // A valid node for (sym) with no SUFFIX flag marks the
- // end of the suffix chain.
- // In the same backref traversal, find a new (state),
- // if the original transition is to a leaf.
-
- STATE s = state;
-
- // Initially state is ROOT. The chain search saves the
- // first state from which the next char has a transition.
- state = t_isleaf(psp, next) ? 0 : t_next(psp, next);
-
- while (1) {
-
- if (t_valid(psp, next)) {
-
- if (next & IS_MATCH) {
- unsigned strno, ss = s + sym, i;
- if (t_isleaf(psp, psp->tranv[ss])) {
- strno = t_strno(psp, psp->tranv[ss]);
- } else {
- for (i = p_hash(psp, ss); psp->hashv[i].state != ss; ++i);
- strno = psp->hashv[i].strno;
- }
-
- if ((ret = cb(strno, cp - text, context)))
- goto EXIT;
- }
-
- if (!state && !t_isleaf(psp, next))
- state = t_next(psp, next);
- if ( state && !(next & IS_SUFFIX))
- break;
- }
-
- if (s == ROOT)
- break;
-
- TRAN b = p_tran(psp, s, BACK);
- s = t_valid(psp, b) ? t_next(psp, b) : ROOT;
- next = p_tran(psp, s, sym);
- }
- }
+ char const *cp = text, *endp = cp + len;
+ uint8_t s;
+ STATE state = *statep;
+ int ret = 0;
+
+ while (cp < endp) {
+ s = caseless ? lc_map[(uint8_t) *cp++] : *cp++;
+ _SYMBOL sym = psp->symv[s];
+ if (!sym) {
+ // Input byte is not in any pattern string.
+ state = ROOT;
+ continue;
+ }
+
+ // Search for a valid transition from this (state, sym),
+ // following the backref chain.
+
+ TRAN next;
+ while (!t_valid(psp, next = p_tran(psp, state, sym)) && state != ROOT) {
+ TRAN back = p_tran(psp, state, BACK);
+ state = t_valid(psp, back) ? t_next(psp, back) : ROOT;
+ }
+
+ if (!t_valid(psp, next))
+ continue;
+
+ if (!(next & (IS_MATCH | IS_SUFFIX))) {
+ // No complete match yet; keep going.
+ state = t_next(psp, next);
+ continue;
+ }
+
+ // At this point, one or more patterns have matched.
+ // Find all matches by following the backref chain.
+ // A valid node for (sym) with no SUFFIX flag marks the
+ // end of the suffix chain.
+ // In the same backref traversal, find a new (state),
+ // if the original transition is to a leaf.
+
+ STATE s = state;
+
+ // Initially state is ROOT. The chain search saves the
+ // first state from which the next char has a transition.
+ state = t_isleaf(psp, next) ? 0 : t_next(psp, next);
+
+ while (1) {
+
+ if (t_valid(psp, next)) {
+
+ if (next & IS_MATCH) {
+ unsigned strno, ss = s + sym, i;
+ if (t_isleaf(psp, psp->tranv[ss])) {
+ strno = t_strno(psp, psp->tranv[ss]);
+ }
+ else {
+ for (i = p_hash(psp, ss); psp->hashv[i].state != ss; ++i)
+ ;
+ strno = psp->hashv[i].strno;
+ }
+
+ if ((ret = cb(strno, cp - text, context)))
+ goto EXIT;
+ }
+
+ if (!state && !t_isleaf(psp, next))
+ state = t_next(psp, next);
+ if (state && !(next & IS_SUFFIX))
+ break;
+ }
+
+ if (s == ROOT)
+ break;
+
+ TRAN b = p_tran(psp, s, BACK);
+ s = t_valid(psp, b) ? t_next(psp, b) : ROOT;
+ next = p_tran(psp, s, sym);
+ }
+ }
EXIT:
*statep = state;
- return ret;
+ return ret;
}
-void
-acism_destroy(ac_trie_t *psp)
+void acism_destroy(ac_trie_t *psp)
{
if (!psp) return;
if (psp->flags & IS_MMAP)
- munmap((char*)psp->tranv - sizeof(ac_trie_t),
- sizeof(ac_trie_t) + p_size(psp));
- else g_free(psp->tranv);
+ munmap((char *) psp->tranv - sizeof(ac_trie_t),
+ sizeof(ac_trie_t) + p_size(psp));
+ else
+ g_free(psp->tranv);
g_free(psp);
}
//EOF