/*
- * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * Copyright (c) 2009-2015, Vsevolod Stakhov
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
#include "fstring.h"
-/*
- * Search first occurence of character in string
- */
-ssize_t
-rspamd_fstrchr (rspamd_fstring_t * src, gchar c)
-{
- register size_t cur = 0;
-
- while (cur < src->len) {
- if (*(src->begin + cur) == c) {
- return cur;
- }
- cur++;
- }
+static const gsize default_initial_size = 48;
+/* Maximum size when we double the size of new string */
+static const gsize max_grow = 1024 * 1024;
- return -1;
-}
+#define fstravail(s) ((s)->allocated - (s)->len)
+static rspamd_fstring_t * rspamd_fstring_grow (rspamd_fstring_t *str,
+ gsize needed_len) G_GNUC_WARN_UNUSED_RESULT;
-/*
- * Search last occurence of character in string
- */
-ssize_t
-rspamd_fstrrchr (rspamd_fstring_t * src, gchar c)
-{
- register ssize_t cur = src->len;
-
- while (cur > 0) {
- if (*(src->begin + cur) == c) {
- return cur;
- }
- cur--;
- }
-
- return -1;
-}
-
-/*
- * Search for pattern in orig
- */
-ssize_t
-rspamd_fstrstr (rspamd_fstring_t * orig, rspamd_fstring_t * pattern)
+rspamd_fstring_t *
+rspamd_fstring_new (void)
{
- register size_t cur = 0, pcur = 0;
+ rspamd_fstring_t *s;
- if (pattern->len > orig->len) {
- return -1;
- }
-
- while (cur < orig->len) {
- if (*(orig->begin + cur) == *pattern->begin) {
- pcur = 0;
- while (cur < orig->len && pcur < pattern->len) {
- if (*(orig->begin + cur) != *(pattern->begin + pcur)) {
- pcur = 0;
- break;
- }
- cur++;
- pcur++;
- }
- return cur - pattern->len;
- }
- cur++;
- }
-
- return -1;
+ g_assert (posix_memalign ((void**)&s, 16, default_initial_size + sizeof (*s)) == 0);
+ s->len = 0;
+ s->allocated = default_initial_size;
+ return s;
}
-/*
- * Search for pattern in orig ignoring case
- */
-ssize_t
-rspamd_fstrstri (rspamd_fstring_t * orig, rspamd_fstring_t * pattern)
-{
- register size_t cur = 0, pcur = 0;
-
- if (pattern->len > orig->len) {
- return -1;
- }
-
- while (cur < orig->len) {
- if (g_ascii_tolower (*(orig->begin + cur)) ==
- g_ascii_tolower (*pattern->begin)) {
- pcur = 0;
- while (cur < orig->len && pcur < pattern->len) {
- if (g_ascii_tolower (*(orig->begin + cur)) !=
- g_ascii_tolower (*(pattern->begin + pcur))) {
- pcur = 0;
- break;
- }
- cur++;
- pcur++;
- }
- return cur - pattern->len;
- }
- cur++;
- }
-
- return -1;
-
-}
-
-/*
- * Split string by tokens
- * word contains parsed word
- *
- * Return: -1 - no new words can be extracted
- * 1 - word was extracted and there are more words
- * 0 - last word extracted
- */
-gint
-rspamd_fstrtok (rspamd_fstring_t * text, const gchar *sep, rspamd_fstring_token_t * state)
+rspamd_fstring_t *
+rspamd_fstring_sized_new (gsize initial_size)
{
- register size_t cur;
- const gchar *csep = sep;
-
- if (state->pos >= text->len) {
- return -1;
- }
+ rspamd_fstring_t *s;
+ gsize real_size = MAX(default_initial_size, initial_size);
- cur = state->pos;
+ g_assert (posix_memalign ((void **)&s, 16, real_size + sizeof (*s)) == 0);
+ s->len = 0;
+ s->allocated = real_size;
- while (cur < text->len) {
- while (*csep) {
- if (*(text->begin + cur) == *csep) {
- state->word.begin = (text->begin + state->pos);
- state->word.len = cur - state->pos;
- state->pos = cur + 1;
- return 1;
- }
- csep++;
- }
- csep = sep;
- cur++;
- }
-
- /* Last word */
- state->word.begin = (text->begin + state->pos);
- state->word.len = cur - state->pos;
- state->pos = cur;
-
- return 0;
+ return s;
}
-/*
- * Copy one string into other
- */
-size_t
-rspamd_fstrcpy (rspamd_fstring_t * dest, rspamd_fstring_t * src)
+void
+rspamd_fstring_free (rspamd_fstring_t *str)
{
- register size_t cur = 0;
-
- if (dest->size < src->len) {
- return 0;
- }
-
- while (cur < src->len && cur < dest->size) {
- *(dest->begin + cur) = *(src->begin + cur);
- cur++;
- }
-
- return cur;
+ free (str);
}
-/*
- * Concatenate two strings
- */
-size_t
-rspamd_fstrcat (rspamd_fstring_t * dest, rspamd_fstring_t * src)
+static rspamd_fstring_t *
+rspamd_fstring_grow (rspamd_fstring_t *str, gsize needed_len)
{
- register size_t cur = 0;
- gchar *p = dest->begin + dest->len;
+ gsize newlen;
+ gpointer nptr;
- if (dest->size < src->len + dest->len) {
- return 0;
- }
+ newlen = str->len + needed_len;
- while (cur < src->len) {
- *p = *(src->begin + cur);
- p++;
- cur++;
+ /*
+ * Stop exponential grow at some point, since it might be slow for the
+ * vast majority of cases
+ */
+ if (newlen < max_grow) {
+ newlen *= 2;
}
-
- dest->len += src->len;
-
- return cur;
-
-}
-
-/*
- * Make copy of string to 0-terminated string
- */
-gchar *
-rspamd_fstr_c_str (rspamd_fstring_t * str, rspamd_mempool_t * pool)
-{
- gchar *res;
- res = rspamd_mempool_alloc (pool, str->len + 1);
-
- /* Do not allow multiply \0 characters */
- memccpy (res, str->begin, '\0', str->len);
- res[str->len] = 0;
-
- return res;
-}
-
-/*
- * Push one character to fstr
- */
-gint
-rspamd_fstrappend_c (rspamd_fstring_t * dest, gchar c)
-{
- if (dest->size < dest->len) {
- /* Need to reallocate string */
- return 0;
+ else {
+ newlen += max_grow;
}
- *(dest->begin + dest->len) = c;
- dest->len++;
- return 1;
-}
+ nptr = realloc (str, newlen + sizeof (*str));
-/*
- * Push one character to fstr
- */
-gint
-rspamd_fstrappend_u (rspamd_fstring_t * dest, gunichar c)
-{
- int l;
- if (dest->size < dest->len) {
- /* Need to reallocate string */
- return 0;
+ if (nptr == NULL) {
+ /* Avoid memory leak */
+ free (str);
+ g_assert (nptr);
}
- l = g_unichar_to_utf8 (c, dest->begin + dest->len);
- dest->len += l;
- return l;
-}
-
-/*
- * Allocate memory for f_str_t
- */
-rspamd_fstring_t *
-rspamd_fstralloc (rspamd_mempool_t * pool, size_t len)
-{
- rspamd_fstring_t *res = rspamd_mempool_alloc (pool, sizeof (rspamd_fstring_t));
+ str = nptr;
+ str->allocated = newlen;
- res->begin = rspamd_mempool_alloc (pool, len);
-
- res->size = len;
- res->len = 0;
- return res;
-}
-
-/*
- * Allocate memory for f_str_t from temporary pool
- */
-rspamd_fstring_t *
-rspamd_fstralloc_tmp (rspamd_mempool_t * pool, size_t len)
-{
- rspamd_fstring_t *res = rspamd_mempool_alloc_tmp (pool, sizeof (rspamd_fstring_t));
-
- res->begin = rspamd_mempool_alloc_tmp (pool, len);
-
- res->size = len;
- res->len = 0;
- return res;
+ return str;
}
-/*
- * Truncate string to its len
- */
rspamd_fstring_t *
-rspamd_fstrtruncate (rspamd_mempool_t * pool, rspamd_fstring_t * orig)
+rspamd_fstring_append (rspamd_fstring_t *str, const char *in, gsize len)
{
- rspamd_fstring_t *res;
+ gsize avail = fstravail (str);
- if (orig == NULL || orig->len == 0 || orig->size <= orig->len) {
- return orig;
+ if (avail < len) {
+ str = rspamd_fstring_grow (str, len);
}
- res = rspamd_fstralloc (pool, orig->len);
- if (res == NULL) {
- return NULL;
- }
- rspamd_fstrcpy (res, orig);
+ memcpy (str->str + str->len, in, len);
+ str->len += len;
- return res;
+ return str;
}
-/*
- * Enlarge string to new size
- */
-rspamd_fstring_t *
-rspamd_fstrgrow (rspamd_mempool_t * pool, rspamd_fstring_t * orig, size_t newlen)
+void
+rspamd_fstring_erase (rspamd_fstring_t *str, gsize pos, gsize len)
{
- rspamd_fstring_t *res;
+ if (pos < str->len) {
+ if (pos + len > str->len) {
+ len = str->len - pos;
+ }
- if (orig == NULL || orig->len == 0 || orig->size >= newlen) {
- return orig;
+ if (len == str->len - pos) {
+ /* Fast path */
+ str->len = pos;
+ }
+ else {
+ memmove (str->str + pos, str->str + pos + len, str->len - pos);
+ str->len -= pos;
+ }
}
-
- res = rspamd_fstralloc (pool, newlen);
- if (res == NULL) {
- return NULL;
+ else {
+ /* Do nothing */
}
- rspamd_fstrcpy (res, orig);
-
- return res;
}
+char *rspamd_fstring_cstr (const rspamd_fstring_t *str);
+
+/* Compat code */
static guint32
fstrhash_c (gchar c, guint32 hval)
{
return (hval << 3) + (hval >> 29);
}
-/*
- * Return hash value for a string
- */
-guint32
-rspamd_fstrhash (rspamd_fstring_t * str)
-{
- size_t i;
- guint32 hval;
- gchar *c;
-
- if (str == NULL) {
- return 0;
- }
- c = str->begin;
- hval = str->len;
-
- for (i = 0; i < str->len; i++, c++) {
- hval = fstrhash_c (*c, hval);
- }
- return hval;
-}
/*
* Return hash value for a string
*/
guint32
-rspamd_fstrhash_lc (rspamd_fstring_t * str, gboolean is_utf)
+rspamd_fstrhash_lc (const rspamd_fstring_t * str, gboolean is_utf)
{
gsize i;
guint32 j, hval;
return 0;
}
- p = str->begin;
+ p = str->str;
hval = str->len;
if (is_utf) {
- while (end < str->begin + str->len) {
+ while (end < str->str + str->len) {
if (!g_utf8_validate (p, str->len, &end)) {
return rspamd_fstrhash_lc (str, FALSE);
}
return hval;
}
-void
-rspamd_fstrstrip (rspamd_fstring_t * str)
-{
- gchar *p = str->begin;
- guint r = 0;
-
- while (r < str->len) {
- if (g_ascii_isspace (*p)) {
- p++;
- r++;
- }
- else {
- break;
- }
- }
-
- if (r > 0) {
- memmove (str->begin, p, str->len - r);
- str->len -= r;
- }
-
- r = str->len;
- p = str->begin + str->len;
- while (r > 0) {
- if (g_ascii_isspace (*p)) {
- p--;
- r--;
- }
- else {
- break;
- }
- }
-
- str->len = r;
-}
-
gboolean
rspamd_fstring_equal (const rspamd_fstring_t *s1,
const rspamd_fstring_t *s2)
g_assert (s1 != NULL && s2 != NULL);
if (s1->len == s2->len) {
- return (memcmp (s1->begin, s2->begin, s1->len) == 0);
+ return (memcmp (s1->str, s2->str, s1->len) == 0);
}
return FALSE;
/*
- * Functions for handling with fixed size strings
+ * Copyright (c) 2009-2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
#ifndef FSTRING_H
#define FSTRING_H
#include "config.h"
#include "mem_pool.h"
-#define update_buf_size(x) (x)->free = (x)->buf->size - \
- ((x)->pos - (x)->buf->begin); (x)->buf->len = (x)->pos - (x)->buf->begin
+/**
+ * Fixed strings library
+ * These strings are NOT null-terminated for speed
+ */
typedef struct f_str_s {
- gchar *begin;
- size_t len;
- size_t size;
+ gsize len;
+ gsize allocated;
+ gchar str[];
} rspamd_fstring_t;
-typedef struct f_str_buf_s {
- rspamd_fstring_t *buf;
- gchar *pos;
- size_t free;
-} rspamd_fstring_buf_t;
-
-typedef struct f_tok_s {
- rspamd_fstring_t word;
- size_t pos;
-} rspamd_fstring_token_t;
-
-/*
- * Search first occurence of character in string
- */
-ssize_t rspamd_fstrchr (rspamd_fstring_t *src, gchar c);
-
-/*
- * Search last occurence of character in string
- */
-ssize_t rspamd_fstrrchr (rspamd_fstring_t *src, gchar c);
-
-/*
- * Search for pattern in orig
- */
-ssize_t rspamd_fstrstr (rspamd_fstring_t *orig, rspamd_fstring_t *pattern);
+typedef struct f_str_tok {
+ gsize len;
+ const gchar *begin;
+} rspamd_ftok_t;
-/*
- * Search for pattern in orig ignoring case
+/**
+ * Create new fixed length string
*/
-ssize_t rspamd_fstrstri (rspamd_fstring_t *orig, rspamd_fstring_t *pattern);
+rspamd_fstring_t* rspamd_fstring_new (void);
-/*
- * Split string by tokens
- * word contains parsed word
+/**
+ * Create new fixed length string with preallocated size
*/
-gint rspamd_fstrtok (rspamd_fstring_t *text, const gchar *sep, rspamd_fstring_token_t *state);
+rspamd_fstring_t *rspamd_fstring_sized_new (gsize initial_size);
-/*
- * Copy one string into other
+/**
+ * Free fixed length string
*/
-size_t rspamd_fstrcpy (rspamd_fstring_t *dest, rspamd_fstring_t *src);
+void rspamd_fstring_free (rspamd_fstring_t *str);
-/*
- * Concatenate two strings
+/**
+ * Append data to a fixed length string
*/
-size_t rspamd_fstrcat (rspamd_fstring_t *dest, rspamd_fstring_t *src);
+rspamd_fstring_t* rspamd_fstring_append (rspamd_fstring_t *str,
+ const char *in, gsize len) G_GNUC_WARN_UNUSED_RESULT;
-/*
- * Push one character to fstr
- */
-gint rspamd_fstrappend_c (rspamd_fstring_t *dest, gchar c);
-/*
- * Push one character to fstr
+/**
+ * Erase `len` characters at postion `pos`
*/
-gint rspamd_fstrappend_u (rspamd_fstring_t *dest, gunichar c);
+void rspamd_fstring_erase (rspamd_fstring_t *str, gsize pos, gsize len);
-/*
- * Allocate memory for f_str_t
+/**
+ * Convert fixed string to a zero terminated string. This string should be
+ * freed by a caller
*/
-rspamd_fstring_t * rspamd_fstralloc (rspamd_mempool_t *pool, size_t len);
-
-/*
- * Allocate memory for f_str_t from temporary pool
- */
-rspamd_fstring_t * rspamd_fstralloc_tmp (rspamd_mempool_t *pool, size_t len);
-
-/*
- * Truncate string to its len
- */
-rspamd_fstring_t * rspamd_fstrtruncate (rspamd_mempool_t *pool, rspamd_fstring_t *orig);
-
-/*
- * Enlarge string to new size
- */
-rspamd_fstring_t * rspamd_fstrgrow (rspamd_mempool_t *pool, rspamd_fstring_t *orig, size_t newlen);
-
-/*
- * Return specified character
- */
-#define fstridx(str, pos) *((str)->begin + (pos))
-
-/*
- * Return fast hash value for fixed string
- */
-guint32 rspamd_fstrhash (rspamd_fstring_t *str);
+char * rspamd_fstring_cstr (const rspamd_fstring_t *str);
/*
* Return fast hash value for fixed string converted to lowercase
*/
-guint32 rspamd_fstrhash_lc (rspamd_fstring_t *str, gboolean is_utf);
-/*
- * Make copy of string to 0-terminated string
- */
-gchar * rspamd_fstr_c_str (rspamd_fstring_t *str, rspamd_mempool_t *pool);
+guint32 rspamd_fstrhash_lc (const rspamd_fstring_t *str, gboolean is_utf);
-/*
- * Strip fstr string from space symbols
+/**
+ * Return true if two strings are equal
*/
-void rspamd_fstrstrip (rspamd_fstring_t *str);
-
gboolean rspamd_fstring_equal (const rspamd_fstring_t *s1,
const rspamd_fstring_t *s2);