Browse Source

Start new fixed strings library.

tags/1.0.5
Vsevolod Stakhov 8 years ago
parent
commit
c7f92da47d
3 changed files with 135 additions and 421 deletions
  1. 76
    331
      src/libutil/fstring.c
  2. 56
    88
      src/libutil/fstring.h
  3. 3
    2
      src/libutil/str_util.c

+ 76
- 331
src/libutil/fstring.c View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2009-2012, Vsevolod Stakhov
* Copyright (c) 2009-2015, Vsevolod Stakhov
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -24,316 +24,118 @@

#include "fstring.h"

/*
* Search first occurence of character in string
*/
ssize_t
rspamd_fstrchr (rspamd_fstring_t * src, gchar c)
{
register size_t cur = 0;

while (cur < src->len) {
if (*(src->begin + cur) == c) {
return cur;
}
cur++;
}
static const gsize default_initial_size = 48;
/* Maximum size when we double the size of new string */
static const gsize max_grow = 1024 * 1024;

return -1;
}
#define fstravail(s) ((s)->allocated - (s)->len)
static rspamd_fstring_t * rspamd_fstring_grow (rspamd_fstring_t *str,
gsize needed_len) G_GNUC_WARN_UNUSED_RESULT;

/*
* Search last occurence of character in string
*/
ssize_t
rspamd_fstrrchr (rspamd_fstring_t * src, gchar c)
{
register ssize_t cur = src->len;

while (cur > 0) {
if (*(src->begin + cur) == c) {
return cur;
}
cur--;
}

return -1;
}

/*
* Search for pattern in orig
*/
ssize_t
rspamd_fstrstr (rspamd_fstring_t * orig, rspamd_fstring_t * pattern)
rspamd_fstring_t *
rspamd_fstring_new (void)
{
register size_t cur = 0, pcur = 0;
rspamd_fstring_t *s;

if (pattern->len > orig->len) {
return -1;
}

while (cur < orig->len) {
if (*(orig->begin + cur) == *pattern->begin) {
pcur = 0;
while (cur < orig->len && pcur < pattern->len) {
if (*(orig->begin + cur) != *(pattern->begin + pcur)) {
pcur = 0;
break;
}
cur++;
pcur++;
}
return cur - pattern->len;
}
cur++;
}

return -1;
g_assert (posix_memalign ((void**)&s, 16, default_initial_size + sizeof (*s)) == 0);
s->len = 0;
s->allocated = default_initial_size;

return s;
}

/*
* Search for pattern in orig ignoring case
*/
ssize_t
rspamd_fstrstri (rspamd_fstring_t * orig, rspamd_fstring_t * pattern)
{
register size_t cur = 0, pcur = 0;

if (pattern->len > orig->len) {
return -1;
}

while (cur < orig->len) {
if (g_ascii_tolower (*(orig->begin + cur)) ==
g_ascii_tolower (*pattern->begin)) {
pcur = 0;
while (cur < orig->len && pcur < pattern->len) {
if (g_ascii_tolower (*(orig->begin + cur)) !=
g_ascii_tolower (*(pattern->begin + pcur))) {
pcur = 0;
break;
}
cur++;
pcur++;
}
return cur - pattern->len;
}
cur++;
}

return -1;

}

/*
* Split string by tokens
* word contains parsed word
*
* Return: -1 - no new words can be extracted
* 1 - word was extracted and there are more words
* 0 - last word extracted
*/
gint
rspamd_fstrtok (rspamd_fstring_t * text, const gchar *sep, rspamd_fstring_token_t * state)
rspamd_fstring_t *
rspamd_fstring_sized_new (gsize initial_size)
{
register size_t cur;
const gchar *csep = sep;

if (state->pos >= text->len) {
return -1;
}
rspamd_fstring_t *s;
gsize real_size = MAX(default_initial_size, initial_size);

cur = state->pos;
g_assert (posix_memalign ((void **)&s, 16, real_size + sizeof (*s)) == 0);
s->len = 0;
s->allocated = real_size;

while (cur < text->len) {
while (*csep) {
if (*(text->begin + cur) == *csep) {
state->word.begin = (text->begin + state->pos);
state->word.len = cur - state->pos;
state->pos = cur + 1;
return 1;
}
csep++;
}
csep = sep;
cur++;
}

/* Last word */
state->word.begin = (text->begin + state->pos);
state->word.len = cur - state->pos;
state->pos = cur;

return 0;
return s;
}

/*
* Copy one string into other
*/
size_t
rspamd_fstrcpy (rspamd_fstring_t * dest, rspamd_fstring_t * src)
void
rspamd_fstring_free (rspamd_fstring_t *str)
{
register size_t cur = 0;

if (dest->size < src->len) {
return 0;
}

while (cur < src->len && cur < dest->size) {
*(dest->begin + cur) = *(src->begin + cur);
cur++;
}

return cur;
free (str);
}

/*
* Concatenate two strings
*/
size_t
rspamd_fstrcat (rspamd_fstring_t * dest, rspamd_fstring_t * src)
static rspamd_fstring_t *
rspamd_fstring_grow (rspamd_fstring_t *str, gsize needed_len)
{
register size_t cur = 0;
gchar *p = dest->begin + dest->len;
gsize newlen;
gpointer nptr;

if (dest->size < src->len + dest->len) {
return 0;
}
newlen = str->len + needed_len;

while (cur < src->len) {
*p = *(src->begin + cur);
p++;
cur++;
/*
* Stop exponential grow at some point, since it might be slow for the
* vast majority of cases
*/
if (newlen < max_grow) {
newlen *= 2;
}

dest->len += src->len;

return cur;

}

/*
* Make copy of string to 0-terminated string
*/
gchar *
rspamd_fstr_c_str (rspamd_fstring_t * str, rspamd_mempool_t * pool)
{
gchar *res;
res = rspamd_mempool_alloc (pool, str->len + 1);

/* Do not allow multiply \0 characters */
memccpy (res, str->begin, '\0', str->len);
res[str->len] = 0;

return res;
}

/*
* Push one character to fstr
*/
gint
rspamd_fstrappend_c (rspamd_fstring_t * dest, gchar c)
{
if (dest->size < dest->len) {
/* Need to reallocate string */
return 0;
else {
newlen += max_grow;
}

*(dest->begin + dest->len) = c;
dest->len++;
return 1;
}
nptr = realloc (str, newlen + sizeof (*str));

/*
* Push one character to fstr
*/
gint
rspamd_fstrappend_u (rspamd_fstring_t * dest, gunichar c)
{
int l;
if (dest->size < dest->len) {
/* Need to reallocate string */
return 0;
if (nptr == NULL) {
/* Avoid memory leak */
free (str);
g_assert (nptr);
}

l = g_unichar_to_utf8 (c, dest->begin + dest->len);
dest->len += l;
return l;
}

/*
* Allocate memory for f_str_t
*/
rspamd_fstring_t *
rspamd_fstralloc (rspamd_mempool_t * pool, size_t len)
{
rspamd_fstring_t *res = rspamd_mempool_alloc (pool, sizeof (rspamd_fstring_t));
str = nptr;
str->allocated = newlen;

res->begin = rspamd_mempool_alloc (pool, len);

res->size = len;
res->len = 0;
return res;
}

/*
* Allocate memory for f_str_t from temporary pool
*/
rspamd_fstring_t *
rspamd_fstralloc_tmp (rspamd_mempool_t * pool, size_t len)
{
rspamd_fstring_t *res = rspamd_mempool_alloc_tmp (pool, sizeof (rspamd_fstring_t));

res->begin = rspamd_mempool_alloc_tmp (pool, len);

res->size = len;
res->len = 0;
return res;
return str;
}

/*
* Truncate string to its len
*/
rspamd_fstring_t *
rspamd_fstrtruncate (rspamd_mempool_t * pool, rspamd_fstring_t * orig)
rspamd_fstring_append (rspamd_fstring_t *str, const char *in, gsize len)
{
rspamd_fstring_t *res;
gsize avail = fstravail (str);

if (orig == NULL || orig->len == 0 || orig->size <= orig->len) {
return orig;
if (avail < len) {
str = rspamd_fstring_grow (str, len);
}

res = rspamd_fstralloc (pool, orig->len);
if (res == NULL) {
return NULL;
}
rspamd_fstrcpy (res, orig);
memcpy (str->str + str->len, in, len);
str->len += len;

return res;
return str;
}

/*
* Enlarge string to new size
*/
rspamd_fstring_t *
rspamd_fstrgrow (rspamd_mempool_t * pool, rspamd_fstring_t * orig, size_t newlen)
void
rspamd_fstring_erase (rspamd_fstring_t *str, gsize pos, gsize len)
{
rspamd_fstring_t *res;
if (pos < str->len) {
if (pos + len > str->len) {
len = str->len - pos;
}

if (orig == NULL || orig->len == 0 || orig->size >= newlen) {
return orig;
if (len == str->len - pos) {
/* Fast path */
str->len = pos;
}
else {
memmove (str->str + pos, str->str + pos + len, str->len - pos);
str->len -= pos;
}
}

res = rspamd_fstralloc (pool, newlen);
if (res == NULL) {
return NULL;
else {
/* Do nothing */
}
rspamd_fstrcpy (res, orig);

return res;
}

char *rspamd_fstring_cstr (const rspamd_fstring_t *str);

/* Compat code */
static guint32
fstrhash_c (gchar c, guint32 hval)
{
@@ -362,33 +164,12 @@ fstrhash_c (gchar c, guint32 hval)
return (hval << 3) + (hval >> 29);
}

/*
* Return hash value for a string
*/
guint32
rspamd_fstrhash (rspamd_fstring_t * str)
{
size_t i;
guint32 hval;
gchar *c;

if (str == NULL) {
return 0;
}
c = str->begin;
hval = str->len;

for (i = 0; i < str->len; i++, c++) {
hval = fstrhash_c (*c, hval);
}
return hval;
}

/*
* Return hash value for a string
*/
guint32
rspamd_fstrhash_lc (rspamd_fstring_t * str, gboolean is_utf)
rspamd_fstrhash_lc (const rspamd_fstring_t * str, gboolean is_utf)
{
gsize i;
guint32 j, hval;
@@ -400,11 +181,11 @@ rspamd_fstrhash_lc (rspamd_fstring_t * str, gboolean is_utf)
return 0;
}

p = str->begin;
p = str->str;
hval = str->len;

if (is_utf) {
while (end < str->begin + str->len) {
while (end < str->str + str->len) {
if (!g_utf8_validate (p, str->len, &end)) {
return rspamd_fstrhash_lc (str, FALSE);
}
@@ -431,42 +212,6 @@ rspamd_fstrhash_lc (rspamd_fstring_t * str, gboolean is_utf)
return hval;
}

void
rspamd_fstrstrip (rspamd_fstring_t * str)
{
gchar *p = str->begin;
guint r = 0;

while (r < str->len) {
if (g_ascii_isspace (*p)) {
p++;
r++;
}
else {
break;
}
}

if (r > 0) {
memmove (str->begin, p, str->len - r);
str->len -= r;
}

r = str->len;
p = str->begin + str->len;
while (r > 0) {
if (g_ascii_isspace (*p)) {
p--;
r--;
}
else {
break;
}
}

str->len = r;
}

gboolean
rspamd_fstring_equal (const rspamd_fstring_t *s1,
const rspamd_fstring_t *s2)
@@ -474,7 +219,7 @@ rspamd_fstring_equal (const rspamd_fstring_t *s1,
g_assert (s1 != NULL && s2 != NULL);

if (s1->len == s2->len) {
return (memcmp (s1->begin, s2->begin, s1->len) == 0);
return (memcmp (s1->str, s2->str, s1->len) == 0);
}

return FALSE;

+ 56
- 88
src/libutil/fstring.h View File

@@ -1,123 +1,91 @@
/*
* Functions for handling with fixed size strings
* Copyright (c) 2009-2015, Vsevolod Stakhov
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/


#ifndef FSTRING_H
#define FSTRING_H

#include "config.h"
#include "mem_pool.h"

#define update_buf_size(x) (x)->free = (x)->buf->size - \
((x)->pos - (x)->buf->begin); (x)->buf->len = (x)->pos - (x)->buf->begin
/**
* Fixed strings library
* These strings are NOT null-terminated for speed
*/

typedef struct f_str_s {
gchar *begin;
size_t len;
size_t size;
gsize len;
gsize allocated;
gchar str[];
} rspamd_fstring_t;

typedef struct f_str_buf_s {
rspamd_fstring_t *buf;
gchar *pos;
size_t free;
} rspamd_fstring_buf_t;

typedef struct f_tok_s {
rspamd_fstring_t word;
size_t pos;
} rspamd_fstring_token_t;

/*
* Search first occurence of character in string
*/
ssize_t rspamd_fstrchr (rspamd_fstring_t *src, gchar c);

/*
* Search last occurence of character in string
*/
ssize_t rspamd_fstrrchr (rspamd_fstring_t *src, gchar c);

/*
* Search for pattern in orig
*/
ssize_t rspamd_fstrstr (rspamd_fstring_t *orig, rspamd_fstring_t *pattern);
typedef struct f_str_tok {
gsize len;
const gchar *begin;
} rspamd_ftok_t;

/*
* Search for pattern in orig ignoring case
/**
* Create new fixed length string
*/
ssize_t rspamd_fstrstri (rspamd_fstring_t *orig, rspamd_fstring_t *pattern);
rspamd_fstring_t* rspamd_fstring_new (void);

/*
* Split string by tokens
* word contains parsed word
/**
* Create new fixed length string with preallocated size
*/
gint rspamd_fstrtok (rspamd_fstring_t *text, const gchar *sep, rspamd_fstring_token_t *state);
rspamd_fstring_t *rspamd_fstring_sized_new (gsize initial_size);

/*
* Copy one string into other
/**
* Free fixed length string
*/
size_t rspamd_fstrcpy (rspamd_fstring_t *dest, rspamd_fstring_t *src);
void rspamd_fstring_free (rspamd_fstring_t *str);

/*
* Concatenate two strings
/**
* Append data to a fixed length string
*/
size_t rspamd_fstrcat (rspamd_fstring_t *dest, rspamd_fstring_t *src);
rspamd_fstring_t* rspamd_fstring_append (rspamd_fstring_t *str,
const char *in, gsize len) G_GNUC_WARN_UNUSED_RESULT;

/*
* Push one character to fstr
*/
gint rspamd_fstrappend_c (rspamd_fstring_t *dest, gchar c);

/*
* Push one character to fstr
/**
* Erase `len` characters at postion `pos`
*/
gint rspamd_fstrappend_u (rspamd_fstring_t *dest, gunichar c);
void rspamd_fstring_erase (rspamd_fstring_t *str, gsize pos, gsize len);

/*
* Allocate memory for f_str_t
/**
* Convert fixed string to a zero terminated string. This string should be
* freed by a caller
*/
rspamd_fstring_t * rspamd_fstralloc (rspamd_mempool_t *pool, size_t len);

/*
* Allocate memory for f_str_t from temporary pool
*/
rspamd_fstring_t * rspamd_fstralloc_tmp (rspamd_mempool_t *pool, size_t len);

/*
* Truncate string to its len
*/
rspamd_fstring_t * rspamd_fstrtruncate (rspamd_mempool_t *pool, rspamd_fstring_t *orig);

/*
* Enlarge string to new size
*/
rspamd_fstring_t * rspamd_fstrgrow (rspamd_mempool_t *pool, rspamd_fstring_t *orig, size_t newlen);

/*
* Return specified character
*/
#define fstridx(str, pos) *((str)->begin + (pos))

/*
* Return fast hash value for fixed string
*/
guint32 rspamd_fstrhash (rspamd_fstring_t *str);
char * rspamd_fstring_cstr (const rspamd_fstring_t *str);

/*
* Return fast hash value for fixed string converted to lowercase
*/
guint32 rspamd_fstrhash_lc (rspamd_fstring_t *str, gboolean is_utf);
/*
* Make copy of string to 0-terminated string
*/
gchar * rspamd_fstr_c_str (rspamd_fstring_t *str, rspamd_mempool_t *pool);
guint32 rspamd_fstrhash_lc (const rspamd_fstring_t *str, gboolean is_utf);

/*
* Strip fstr string from space symbols
/**
* Return true if two strings are equal
*/
void rspamd_fstrstrip (rspamd_fstring_t *str);

gboolean rspamd_fstring_equal (const rspamd_fstring_t *s1,
const rspamd_fstring_t *s2);


+ 3
- 2
src/libutil/str_util.c View File

@@ -218,8 +218,9 @@ gboolean
rspamd_fstring_icase_equal (gconstpointer v, gconstpointer v2)
{
const rspamd_fstring_t *f1 = v, *f2 = v2;

if (f1->len == f2->len &&
g_ascii_strncasecmp (f1->begin, f2->begin, f1->len) == 0) {
g_ascii_strncasecmp (f1->str, f2->str, f1->len) == 0) {
return TRUE;
}

@@ -232,7 +233,7 @@ rspamd_fstring_icase_hash (gconstpointer key)
{
const rspamd_fstring_t *f = key;

return rspamd_icase_hash (f->begin, f->len);
return rspamd_icase_hash (f->str, f->len);
}

gboolean

Loading…
Cancel
Save