diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-10-06 15:24:32 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-10-06 15:24:32 +0100 |
commit | c7f92da47dcb17393132b713f75a4dbd41629819 (patch) | |
tree | e8ad8656658ecb9ce99b1ddaad11b99cef56ee22 /src | |
parent | 763ab77075df986497e7b6937d33d5f487ba7fc6 (diff) | |
download | rspamd-c7f92da47dcb17393132b713f75a4dbd41629819.tar.gz rspamd-c7f92da47dcb17393132b713f75a4dbd41629819.zip |
Start new fixed strings library.
Diffstat (limited to 'src')
-rw-r--r-- | src/libutil/fstring.c | 407 | ||||
-rw-r--r-- | src/libutil/fstring.h | 144 | ||||
-rw-r--r-- | src/libutil/str_util.c | 5 |
3 files changed, 135 insertions, 421 deletions
diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c index 991cd3000..9e96aac3c 100644 --- a/src/libutil/fstring.c +++ b/src/libutil/fstring.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2012, Vsevolod Stakhov + * Copyright (c) 2009-2015, Vsevolod Stakhov * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,316 +24,118 @@ #include "fstring.h" -/* - * Search first occurence of character in string - */ -ssize_t -rspamd_fstrchr (rspamd_fstring_t * src, gchar c) -{ - register size_t cur = 0; - - while (cur < src->len) { - if (*(src->begin + cur) == c) { - return cur; - } - cur++; - } +static const gsize default_initial_size = 48; +/* Maximum size when we double the size of new string */ +static const gsize max_grow = 1024 * 1024; - return -1; -} +#define fstravail(s) ((s)->allocated - (s)->len) +static rspamd_fstring_t * rspamd_fstring_grow (rspamd_fstring_t *str, + gsize needed_len) G_GNUC_WARN_UNUSED_RESULT; -/* - * Search last occurence of character in string - */ -ssize_t -rspamd_fstrrchr (rspamd_fstring_t * src, gchar c) -{ - register ssize_t cur = src->len; - - while (cur > 0) { - if (*(src->begin + cur) == c) { - return cur; - } - cur--; - } - - return -1; -} - -/* - * Search for pattern in orig - */ -ssize_t -rspamd_fstrstr (rspamd_fstring_t * orig, rspamd_fstring_t * pattern) +rspamd_fstring_t * +rspamd_fstring_new (void) { - register size_t cur = 0, pcur = 0; + rspamd_fstring_t *s; - if (pattern->len > orig->len) { - return -1; - } - - while (cur < orig->len) { - if (*(orig->begin + cur) == *pattern->begin) { - pcur = 0; - while (cur < orig->len && pcur < pattern->len) { - if (*(orig->begin + cur) != *(pattern->begin + pcur)) { - pcur = 0; - break; - } - cur++; - pcur++; - } - return cur - pattern->len; - } - cur++; - } - - return -1; + g_assert (posix_memalign ((void**)&s, 16, default_initial_size + sizeof (*s)) == 0); + s->len = 0; + s->allocated = default_initial_size; + return s; } -/* - * Search for pattern in orig ignoring case - */ -ssize_t -rspamd_fstrstri (rspamd_fstring_t * orig, rspamd_fstring_t * pattern) -{ - register size_t cur = 0, pcur = 0; - - if (pattern->len > orig->len) { - return -1; - } - - while (cur < orig->len) { - if (g_ascii_tolower (*(orig->begin + cur)) == - g_ascii_tolower (*pattern->begin)) { - pcur = 0; - while (cur < orig->len && pcur < pattern->len) { - if (g_ascii_tolower (*(orig->begin + cur)) != - g_ascii_tolower (*(pattern->begin + pcur))) { - pcur = 0; - break; - } - cur++; - pcur++; - } - return cur - pattern->len; - } - cur++; - } - - return -1; - -} - -/* - * Split string by tokens - * word contains parsed word - * - * Return: -1 - no new words can be extracted - * 1 - word was extracted and there are more words - * 0 - last word extracted - */ -gint -rspamd_fstrtok (rspamd_fstring_t * text, const gchar *sep, rspamd_fstring_token_t * state) +rspamd_fstring_t * +rspamd_fstring_sized_new (gsize initial_size) { - register size_t cur; - const gchar *csep = sep; - - if (state->pos >= text->len) { - return -1; - } + rspamd_fstring_t *s; + gsize real_size = MAX(default_initial_size, initial_size); - cur = state->pos; + g_assert (posix_memalign ((void **)&s, 16, real_size + sizeof (*s)) == 0); + s->len = 0; + s->allocated = real_size; - while (cur < text->len) { - while (*csep) { - if (*(text->begin + cur) == *csep) { - state->word.begin = (text->begin + state->pos); - state->word.len = cur - state->pos; - state->pos = cur + 1; - return 1; - } - csep++; - } - csep = sep; - cur++; - } - - /* Last word */ - state->word.begin = (text->begin + state->pos); - state->word.len = cur - state->pos; - state->pos = cur; - - return 0; + return s; } -/* - * Copy one string into other - */ -size_t -rspamd_fstrcpy (rspamd_fstring_t * dest, rspamd_fstring_t * src) +void +rspamd_fstring_free (rspamd_fstring_t *str) { - register size_t cur = 0; - - if (dest->size < src->len) { - return 0; - } - - while (cur < src->len && cur < dest->size) { - *(dest->begin + cur) = *(src->begin + cur); - cur++; - } - - return cur; + free (str); } -/* - * Concatenate two strings - */ -size_t -rspamd_fstrcat (rspamd_fstring_t * dest, rspamd_fstring_t * src) +static rspamd_fstring_t * +rspamd_fstring_grow (rspamd_fstring_t *str, gsize needed_len) { - register size_t cur = 0; - gchar *p = dest->begin + dest->len; + gsize newlen; + gpointer nptr; - if (dest->size < src->len + dest->len) { - return 0; - } + newlen = str->len + needed_len; - while (cur < src->len) { - *p = *(src->begin + cur); - p++; - cur++; + /* + * Stop exponential grow at some point, since it might be slow for the + * vast majority of cases + */ + if (newlen < max_grow) { + newlen *= 2; } - - dest->len += src->len; - - return cur; - -} - -/* - * Make copy of string to 0-terminated string - */ -gchar * -rspamd_fstr_c_str (rspamd_fstring_t * str, rspamd_mempool_t * pool) -{ - gchar *res; - res = rspamd_mempool_alloc (pool, str->len + 1); - - /* Do not allow multiply \0 characters */ - memccpy (res, str->begin, '\0', str->len); - res[str->len] = 0; - - return res; -} - -/* - * Push one character to fstr - */ -gint -rspamd_fstrappend_c (rspamd_fstring_t * dest, gchar c) -{ - if (dest->size < dest->len) { - /* Need to reallocate string */ - return 0; + else { + newlen += max_grow; } - *(dest->begin + dest->len) = c; - dest->len++; - return 1; -} + nptr = realloc (str, newlen + sizeof (*str)); -/* - * Push one character to fstr - */ -gint -rspamd_fstrappend_u (rspamd_fstring_t * dest, gunichar c) -{ - int l; - if (dest->size < dest->len) { - /* Need to reallocate string */ - return 0; + if (nptr == NULL) { + /* Avoid memory leak */ + free (str); + g_assert (nptr); } - l = g_unichar_to_utf8 (c, dest->begin + dest->len); - dest->len += l; - return l; -} - -/* - * Allocate memory for f_str_t - */ -rspamd_fstring_t * -rspamd_fstralloc (rspamd_mempool_t * pool, size_t len) -{ - rspamd_fstring_t *res = rspamd_mempool_alloc (pool, sizeof (rspamd_fstring_t)); + str = nptr; + str->allocated = newlen; - res->begin = rspamd_mempool_alloc (pool, len); - - res->size = len; - res->len = 0; - return res; -} - -/* - * Allocate memory for f_str_t from temporary pool - */ -rspamd_fstring_t * -rspamd_fstralloc_tmp (rspamd_mempool_t * pool, size_t len) -{ - rspamd_fstring_t *res = rspamd_mempool_alloc_tmp (pool, sizeof (rspamd_fstring_t)); - - res->begin = rspamd_mempool_alloc_tmp (pool, len); - - res->size = len; - res->len = 0; - return res; + return str; } -/* - * Truncate string to its len - */ rspamd_fstring_t * -rspamd_fstrtruncate (rspamd_mempool_t * pool, rspamd_fstring_t * orig) +rspamd_fstring_append (rspamd_fstring_t *str, const char *in, gsize len) { - rspamd_fstring_t *res; + gsize avail = fstravail (str); - if (orig == NULL || orig->len == 0 || orig->size <= orig->len) { - return orig; + if (avail < len) { + str = rspamd_fstring_grow (str, len); } - res = rspamd_fstralloc (pool, orig->len); - if (res == NULL) { - return NULL; - } - rspamd_fstrcpy (res, orig); + memcpy (str->str + str->len, in, len); + str->len += len; - return res; + return str; } -/* - * Enlarge string to new size - */ -rspamd_fstring_t * -rspamd_fstrgrow (rspamd_mempool_t * pool, rspamd_fstring_t * orig, size_t newlen) +void +rspamd_fstring_erase (rspamd_fstring_t *str, gsize pos, gsize len) { - rspamd_fstring_t *res; + if (pos < str->len) { + if (pos + len > str->len) { + len = str->len - pos; + } - if (orig == NULL || orig->len == 0 || orig->size >= newlen) { - return orig; + if (len == str->len - pos) { + /* Fast path */ + str->len = pos; + } + else { + memmove (str->str + pos, str->str + pos + len, str->len - pos); + str->len -= pos; + } } - - res = rspamd_fstralloc (pool, newlen); - if (res == NULL) { - return NULL; + else { + /* Do nothing */ } - rspamd_fstrcpy (res, orig); - - return res; } +char *rspamd_fstring_cstr (const rspamd_fstring_t *str); + +/* Compat code */ static guint32 fstrhash_c (gchar c, guint32 hval) { @@ -362,33 +164,12 @@ fstrhash_c (gchar c, guint32 hval) return (hval << 3) + (hval >> 29); } -/* - * Return hash value for a string - */ -guint32 -rspamd_fstrhash (rspamd_fstring_t * str) -{ - size_t i; - guint32 hval; - gchar *c; - - if (str == NULL) { - return 0; - } - c = str->begin; - hval = str->len; - - for (i = 0; i < str->len; i++, c++) { - hval = fstrhash_c (*c, hval); - } - return hval; -} /* * Return hash value for a string */ guint32 -rspamd_fstrhash_lc (rspamd_fstring_t * str, gboolean is_utf) +rspamd_fstrhash_lc (const rspamd_fstring_t * str, gboolean is_utf) { gsize i; guint32 j, hval; @@ -400,11 +181,11 @@ rspamd_fstrhash_lc (rspamd_fstring_t * str, gboolean is_utf) return 0; } - p = str->begin; + p = str->str; hval = str->len; if (is_utf) { - while (end < str->begin + str->len) { + while (end < str->str + str->len) { if (!g_utf8_validate (p, str->len, &end)) { return rspamd_fstrhash_lc (str, FALSE); } @@ -431,42 +212,6 @@ rspamd_fstrhash_lc (rspamd_fstring_t * str, gboolean is_utf) return hval; } -void -rspamd_fstrstrip (rspamd_fstring_t * str) -{ - gchar *p = str->begin; - guint r = 0; - - while (r < str->len) { - if (g_ascii_isspace (*p)) { - p++; - r++; - } - else { - break; - } - } - - if (r > 0) { - memmove (str->begin, p, str->len - r); - str->len -= r; - } - - r = str->len; - p = str->begin + str->len; - while (r > 0) { - if (g_ascii_isspace (*p)) { - p--; - r--; - } - else { - break; - } - } - - str->len = r; -} - gboolean rspamd_fstring_equal (const rspamd_fstring_t *s1, const rspamd_fstring_t *s2) @@ -474,7 +219,7 @@ rspamd_fstring_equal (const rspamd_fstring_t *s1, g_assert (s1 != NULL && s2 != NULL); if (s1->len == s2->len) { - return (memcmp (s1->begin, s2->begin, s1->len) == 0); + return (memcmp (s1->str, s2->str, s1->len) == 0); } return FALSE; diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h index 27482877c..9fdf47211 100644 --- a/src/libutil/fstring.h +++ b/src/libutil/fstring.h @@ -1,123 +1,91 @@ /* - * Functions for handling with fixed size strings + * Copyright (c) 2009-2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #ifndef FSTRING_H #define FSTRING_H #include "config.h" #include "mem_pool.h" -#define update_buf_size(x) (x)->free = (x)->buf->size - \ - ((x)->pos - (x)->buf->begin); (x)->buf->len = (x)->pos - (x)->buf->begin +/** + * Fixed strings library + * These strings are NOT null-terminated for speed + */ typedef struct f_str_s { - gchar *begin; - size_t len; - size_t size; + gsize len; + gsize allocated; + gchar str[]; } rspamd_fstring_t; -typedef struct f_str_buf_s { - rspamd_fstring_t *buf; - gchar *pos; - size_t free; -} rspamd_fstring_buf_t; - -typedef struct f_tok_s { - rspamd_fstring_t word; - size_t pos; -} rspamd_fstring_token_t; - -/* - * Search first occurence of character in string - */ -ssize_t rspamd_fstrchr (rspamd_fstring_t *src, gchar c); - -/* - * Search last occurence of character in string - */ -ssize_t rspamd_fstrrchr (rspamd_fstring_t *src, gchar c); - -/* - * Search for pattern in orig - */ -ssize_t rspamd_fstrstr (rspamd_fstring_t *orig, rspamd_fstring_t *pattern); +typedef struct f_str_tok { + gsize len; + const gchar *begin; +} rspamd_ftok_t; -/* - * Search for pattern in orig ignoring case +/** + * Create new fixed length string */ -ssize_t rspamd_fstrstri (rspamd_fstring_t *orig, rspamd_fstring_t *pattern); +rspamd_fstring_t* rspamd_fstring_new (void); -/* - * Split string by tokens - * word contains parsed word +/** + * Create new fixed length string with preallocated size */ -gint rspamd_fstrtok (rspamd_fstring_t *text, const gchar *sep, rspamd_fstring_token_t *state); +rspamd_fstring_t *rspamd_fstring_sized_new (gsize initial_size); -/* - * Copy one string into other +/** + * Free fixed length string */ -size_t rspamd_fstrcpy (rspamd_fstring_t *dest, rspamd_fstring_t *src); +void rspamd_fstring_free (rspamd_fstring_t *str); -/* - * Concatenate two strings +/** + * Append data to a fixed length string */ -size_t rspamd_fstrcat (rspamd_fstring_t *dest, rspamd_fstring_t *src); +rspamd_fstring_t* rspamd_fstring_append (rspamd_fstring_t *str, + const char *in, gsize len) G_GNUC_WARN_UNUSED_RESULT; -/* - * Push one character to fstr - */ -gint rspamd_fstrappend_c (rspamd_fstring_t *dest, gchar c); -/* - * Push one character to fstr +/** + * Erase `len` characters at postion `pos` */ -gint rspamd_fstrappend_u (rspamd_fstring_t *dest, gunichar c); +void rspamd_fstring_erase (rspamd_fstring_t *str, gsize pos, gsize len); -/* - * Allocate memory for f_str_t +/** + * Convert fixed string to a zero terminated string. This string should be + * freed by a caller */ -rspamd_fstring_t * rspamd_fstralloc (rspamd_mempool_t *pool, size_t len); - -/* - * Allocate memory for f_str_t from temporary pool - */ -rspamd_fstring_t * rspamd_fstralloc_tmp (rspamd_mempool_t *pool, size_t len); - -/* - * Truncate string to its len - */ -rspamd_fstring_t * rspamd_fstrtruncate (rspamd_mempool_t *pool, rspamd_fstring_t *orig); - -/* - * Enlarge string to new size - */ -rspamd_fstring_t * rspamd_fstrgrow (rspamd_mempool_t *pool, rspamd_fstring_t *orig, size_t newlen); - -/* - * Return specified character - */ -#define fstridx(str, pos) *((str)->begin + (pos)) - -/* - * Return fast hash value for fixed string - */ -guint32 rspamd_fstrhash (rspamd_fstring_t *str); +char * rspamd_fstring_cstr (const rspamd_fstring_t *str); /* * Return fast hash value for fixed string converted to lowercase */ -guint32 rspamd_fstrhash_lc (rspamd_fstring_t *str, gboolean is_utf); -/* - * Make copy of string to 0-terminated string - */ -gchar * rspamd_fstr_c_str (rspamd_fstring_t *str, rspamd_mempool_t *pool); +guint32 rspamd_fstrhash_lc (const rspamd_fstring_t *str, gboolean is_utf); -/* - * Strip fstr string from space symbols +/** + * Return true if two strings are equal */ -void rspamd_fstrstrip (rspamd_fstring_t *str); - gboolean rspamd_fstring_equal (const rspamd_fstring_t *s1, const rspamd_fstring_t *s2); diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 6465208f9..6de4ef904 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -218,8 +218,9 @@ gboolean rspamd_fstring_icase_equal (gconstpointer v, gconstpointer v2) { const rspamd_fstring_t *f1 = v, *f2 = v2; + if (f1->len == f2->len && - g_ascii_strncasecmp (f1->begin, f2->begin, f1->len) == 0) { + g_ascii_strncasecmp (f1->str, f2->str, f1->len) == 0) { return TRUE; } @@ -232,7 +233,7 @@ rspamd_fstring_icase_hash (gconstpointer key) { const rspamd_fstring_t *f = key; - return rspamd_icase_hash (f->begin, f->len); + return rspamd_icase_hash (f->str, f->len); } gboolean |