You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fstring.c 7.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "fstring.h"
  17. #include "str_util.h"
  18. static const gsize default_initial_size = 48;
  19. /* Maximum size when we double the size of new string */
  20. static const gsize max_grow = 1024 * 1024;
  21. #define fstravail(s) ((s)->allocated - (s)->len)
  22. static rspamd_fstring_t * rspamd_fstring_grow (rspamd_fstring_t *str,
  23. gsize needed_len) G_GNUC_WARN_UNUSED_RESULT;
  24. rspamd_fstring_t *
  25. rspamd_fstring_new (void)
  26. {
  27. rspamd_fstring_t *s;
  28. g_assert (posix_memalign ((void**)&s, 16, default_initial_size + sizeof (*s)) == 0);
  29. s->len = 0;
  30. s->allocated = default_initial_size;
  31. return s;
  32. }
  33. rspamd_fstring_t *
  34. rspamd_fstring_sized_new (gsize initial_size)
  35. {
  36. rspamd_fstring_t *s;
  37. gsize real_size = MAX (default_initial_size, initial_size);
  38. g_assert (posix_memalign ((void **)&s, 16, real_size + sizeof (*s)) == 0);
  39. s->len = 0;
  40. s->allocated = real_size;
  41. return s;
  42. }
  43. rspamd_fstring_t *
  44. rspamd_fstring_new_init (const gchar *init, gsize len)
  45. {
  46. rspamd_fstring_t *s;
  47. gsize real_size = MAX (default_initial_size, len);
  48. g_assert (posix_memalign ((void **) &s, 16, real_size + sizeof (*s)) == 0);
  49. s->len = len;
  50. s->allocated = real_size;
  51. memcpy (s->str, init, len);
  52. return s;
  53. }
  54. rspamd_fstring_t *
  55. rspamd_fstring_assign (rspamd_fstring_t *str, const gchar *init, gsize len)
  56. {
  57. gsize avail = str->allocated;
  58. if (avail < len) {
  59. str = rspamd_fstring_grow (str, len);
  60. }
  61. if (len > 0) {
  62. memcpy (str->str, init, len);
  63. }
  64. str->len = len;
  65. return str;
  66. }
  67. void
  68. rspamd_fstring_free (rspamd_fstring_t *str)
  69. {
  70. free (str);
  71. }
  72. static rspamd_fstring_t *
  73. rspamd_fstring_grow (rspamd_fstring_t *str, gsize needed_len)
  74. {
  75. gsize newlen;
  76. gpointer nptr;
  77. newlen = str->len + needed_len;
  78. /*
  79. * Stop exponential grow at some point, since it might be slow for the
  80. * vast majority of cases
  81. */
  82. if (newlen < max_grow) {
  83. newlen *= 2;
  84. }
  85. else {
  86. newlen += max_grow;
  87. }
  88. nptr = realloc (str, newlen + sizeof (*str));
  89. if (nptr == NULL) {
  90. /* Avoid memory leak */
  91. free (str);
  92. g_assert (nptr);
  93. }
  94. str = nptr;
  95. str->allocated = newlen;
  96. return str;
  97. }
  98. rspamd_fstring_t *
  99. rspamd_fstring_append (rspamd_fstring_t *str, const char *in, gsize len)
  100. {
  101. gsize avail = fstravail (str);
  102. if (avail < len) {
  103. str = rspamd_fstring_grow (str, len);
  104. }
  105. memcpy (str->str + str->len, in, len);
  106. str->len += len;
  107. return str;
  108. }
  109. rspamd_fstring_t *
  110. rspamd_fstring_append_chars (rspamd_fstring_t *str,
  111. char c, gsize len)
  112. {
  113. gsize avail = fstravail (str);
  114. if (avail < len) {
  115. str = rspamd_fstring_grow (str, len);
  116. }
  117. memset (str->str + str->len, c, len);
  118. str->len += len;
  119. return str;
  120. }
  121. void
  122. rspamd_fstring_erase (rspamd_fstring_t *str, gsize pos, gsize len)
  123. {
  124. if (pos < str->len) {
  125. if (pos + len > str->len) {
  126. len = str->len - pos;
  127. }
  128. if (len == str->len - pos) {
  129. /* Fast path */
  130. str->len = pos;
  131. }
  132. else {
  133. memmove (str->str + pos, str->str + pos + len, str->len - pos);
  134. str->len -= pos;
  135. }
  136. }
  137. else {
  138. /* Do nothing */
  139. }
  140. }
  141. char *rspamd_fstring_cstr (const rspamd_fstring_t *str);
  142. /* Compat code */
  143. static guint32
  144. fstrhash_c (gchar c, guint32 hval)
  145. {
  146. guint32 tmp;
  147. /*
  148. * xor in the current byte against each byte of hval
  149. * (which alone gaurantees that every bit of input will have
  150. * an effect on the output)
  151. */
  152. tmp = c & 0xFF;
  153. tmp = tmp | (tmp << 8) | (tmp << 16) | (tmp << 24);
  154. hval ^= tmp;
  155. /* add some bits out of the middle as low order bits */
  156. hval = hval + ((hval >> 12) & 0x0000ffff);
  157. /* swap most and min significative bytes */
  158. tmp = (hval << 24) | ((hval >> 24) & 0xff);
  159. /* zero most and min significative bytes of hval */
  160. hval &= 0x00ffff00;
  161. hval |= tmp;
  162. /*
  163. * rotate hval 3 bits to the left (thereby making the
  164. * 3rd msb of the above mess the hsb of the output hash)
  165. */
  166. return (hval << 3) + (hval >> 29);
  167. }
  168. /*
  169. * Return hash value for a string
  170. */
  171. guint32
  172. rspamd_fstrhash_lc (const rspamd_ftok_t * str, gboolean is_utf)
  173. {
  174. gsize i;
  175. guint32 j, hval;
  176. const gchar *p, *end = NULL;
  177. gchar t;
  178. gunichar uc;
  179. if (str == NULL) {
  180. return 0;
  181. }
  182. p = str->begin;
  183. hval = str->len;
  184. if (is_utf) {
  185. while (end < str->begin + str->len) {
  186. if (!g_utf8_validate (p, str->len, &end)) {
  187. return rspamd_fstrhash_lc (str, FALSE);
  188. }
  189. while (p < end) {
  190. uc = g_unichar_tolower (g_utf8_get_char (p));
  191. for (j = 0; j < sizeof (gunichar); j++) {
  192. t = (uc >> (j * 8)) & 0xff;
  193. if (t != 0) {
  194. hval = fstrhash_c (t, hval);
  195. }
  196. }
  197. p = g_utf8_next_char (p);
  198. }
  199. p = end + 1;
  200. }
  201. }
  202. else {
  203. for (i = 0; i < str->len; i++, p++) {
  204. hval = fstrhash_c (g_ascii_tolower (*p), hval);
  205. }
  206. }
  207. return hval;
  208. }
  209. gboolean
  210. rspamd_fstring_equal (const rspamd_fstring_t *s1,
  211. const rspamd_fstring_t *s2)
  212. {
  213. g_assert (s1 != NULL && s2 != NULL);
  214. if (s1->len == s2->len) {
  215. return (memcmp (s1->str, s2->str, s1->len) == 0);
  216. }
  217. return FALSE;
  218. }
  219. gint
  220. rspamd_fstring_casecmp (const rspamd_fstring_t *s1,
  221. const rspamd_fstring_t *s2)
  222. {
  223. gint ret = 0;
  224. g_assert (s1 != NULL && s2 != NULL);
  225. if (s1->len == s2->len) {
  226. ret = rspamd_lc_cmp (s1->str, s2->str, s1->len);
  227. }
  228. else {
  229. ret = s1->len - s2->len;
  230. }
  231. return ret;
  232. }
  233. gint
  234. rspamd_fstring_cmp (const rspamd_fstring_t *s1,
  235. const rspamd_fstring_t *s2)
  236. {
  237. g_assert (s1 != NULL && s2 != NULL);
  238. if (s1->len == s2->len) {
  239. return memcmp (s1->str, s2->str, s1->len);
  240. }
  241. return s1->len - s2->len;
  242. }
  243. gint
  244. rspamd_ftok_casecmp (const rspamd_ftok_t *s1,
  245. const rspamd_ftok_t *s2)
  246. {
  247. gint ret = 0;
  248. g_assert (s1 != NULL && s2 != NULL);
  249. if (s1->len == s2->len) {
  250. ret = rspamd_lc_cmp (s1->begin, s2->begin, s1->len);
  251. }
  252. else {
  253. ret = s1->len - s2->len;
  254. }
  255. return ret;
  256. }
  257. gint
  258. rspamd_ftok_cmp (const rspamd_ftok_t *s1,
  259. const rspamd_ftok_t *s2)
  260. {
  261. g_assert (s1 != NULL && s2 != NULL);
  262. if (s1->len == s2->len) {
  263. return memcmp (s1->begin, s2->begin, s1->len);
  264. }
  265. return s1->len - s2->len;
  266. }
  267. void
  268. rspamd_fstring_mapped_ftok_free (gpointer p)
  269. {
  270. rspamd_ftok_t *tok = p;
  271. rspamd_fstring_t *storage;
  272. storage = (rspamd_fstring_t *) (tok->begin - 2 * sizeof (gsize));
  273. rspamd_fstring_free (storage);
  274. g_slice_free1 (sizeof (*tok), tok);
  275. }
  276. rspamd_ftok_t *
  277. rspamd_ftok_map (const rspamd_fstring_t *s)
  278. {
  279. rspamd_ftok_t *tok;
  280. g_assert (s != NULL);
  281. tok = g_slice_alloc (sizeof (*tok));
  282. tok->begin = s->str;
  283. tok->len = s->len;
  284. return tok;
  285. }
  286. char *
  287. rspamd_fstring_cstr (const rspamd_fstring_t *s)
  288. {
  289. char *result;
  290. if (s == NULL) {
  291. return NULL;
  292. }
  293. result = g_malloc (s->len + 1);
  294. memcpy (result, s->str, s->len);
  295. result[s->len] = '\0';
  296. return result;
  297. }
  298. gboolean
  299. rspamd_ftok_cstr_equal (const rspamd_ftok_t *s, const gchar *pat,
  300. gboolean icase)
  301. {
  302. gsize slen;
  303. rspamd_ftok_t srch;
  304. g_assert (s != NULL);
  305. g_assert (pat != NULL);
  306. slen = strlen (pat);
  307. srch.begin = pat;
  308. srch.len = slen;
  309. if (icase) {
  310. return (rspamd_ftok_casecmp (s, &srch) == 0);
  311. }
  312. return (rspamd_ftok_cmp (s, &srch) == 0);
  313. }