You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fstring.c 8.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "fstring.h"
  17. #include "str_util.h"
  18. #include "contrib/fastutf8/fastutf8.h"
  19. #include "contrib/mumhash/mum.h"
  20. #ifdef WITH_JEMALLOC
  21. #include <jemalloc/jemalloc.h>
  22. #if (JEMALLOC_VERSION_MAJOR == 3 && JEMALLOC_VERSION_MINOR >= 6) || (JEMALLOC_VERSION_MAJOR > 3)
  23. #define HAVE_MALLOC_SIZE 1
  24. #define sys_alloc_size(sz) nallocx(sz, 0)
  25. #endif
  26. #elif defined(__APPLE__)
  27. #include <malloc/malloc.h>
  28. #define HAVE_MALLOC_SIZE 1
  29. #define sys_alloc_size(sz) malloc_good_size(sz)
  30. #endif
  31. static const gsize default_initial_size = 16;
  32. #define fstravail(s) ((s)->allocated - (s)->len)
  33. rspamd_fstring_t *
  34. rspamd_fstring_new(void)
  35. {
  36. rspamd_fstring_t *s;
  37. if ((s = malloc(default_initial_size + sizeof(*s))) == NULL) {
  38. g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes",
  39. G_STRLOC, default_initial_size + sizeof(*s));
  40. return NULL;
  41. }
  42. s->len = 0;
  43. s->allocated = default_initial_size;
  44. return s;
  45. }
  46. rspamd_fstring_t *
  47. rspamd_fstring_sized_new(gsize initial_size)
  48. {
  49. rspamd_fstring_t *s;
  50. gsize real_size = MAX(default_initial_size, initial_size);
  51. if ((s = malloc(real_size + sizeof(*s))) == NULL) {
  52. g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes",
  53. G_STRLOC, real_size + sizeof(*s));
  54. return NULL;
  55. }
  56. s->len = 0;
  57. s->allocated = real_size;
  58. return s;
  59. }
  60. rspamd_fstring_t *
  61. rspamd_fstring_new_init(const gchar *init, gsize len)
  62. {
  63. rspamd_fstring_t *s;
  64. gsize real_size = MAX(default_initial_size, len);
  65. if ((s = malloc(real_size + sizeof(*s))) == NULL) {
  66. g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes",
  67. G_STRLOC, real_size + sizeof(*s));
  68. abort();
  69. }
  70. s->len = len;
  71. s->allocated = real_size;
  72. memcpy(s->str, init, len);
  73. return s;
  74. }
  75. rspamd_fstring_t *
  76. rspamd_fstring_assign(rspamd_fstring_t *str, const gchar *init, gsize len)
  77. {
  78. gsize avail;
  79. if (str == NULL) {
  80. return rspamd_fstring_new_init(init, len);
  81. }
  82. avail = fstravail(str);
  83. if (avail < len) {
  84. str = rspamd_fstring_grow(str, len);
  85. }
  86. if (len > 0) {
  87. memcpy(str->str, init, len);
  88. }
  89. str->len = len;
  90. return str;
  91. }
  92. void rspamd_fstring_free(rspamd_fstring_t *str)
  93. {
  94. free(str);
  95. }
  96. inline gsize
  97. rspamd_fstring_suggest_size(gsize len, gsize allocated, gsize needed_len)
  98. {
  99. gsize newlen, optlen = 0;
  100. if (allocated < 4096) {
  101. newlen = MAX(len + needed_len, allocated * 2);
  102. }
  103. else {
  104. newlen = MAX(len + needed_len, 1 + allocated * 3 / 2);
  105. }
  106. #ifdef HAVE_MALLOC_SIZE
  107. optlen = sys_alloc_size(newlen + sizeof(rspamd_fstring_t));
  108. #endif
  109. return MAX(newlen, optlen);
  110. }
  111. rspamd_fstring_t *
  112. rspamd_fstring_grow(rspamd_fstring_t *str, gsize needed_len)
  113. {
  114. gsize newlen;
  115. gpointer nptr;
  116. newlen = rspamd_fstring_suggest_size(str->len, str->allocated, needed_len);
  117. nptr = realloc(str, newlen + sizeof(*str));
  118. if (nptr == NULL) {
  119. /* Avoid memory leak */
  120. free(str);
  121. g_error("%s: failed to re-allocate %" G_GSIZE_FORMAT " bytes",
  122. G_STRLOC, newlen + sizeof(*str));
  123. abort();
  124. }
  125. str = nptr;
  126. str->allocated = newlen;
  127. return str;
  128. }
  129. rspamd_fstring_t *
  130. rspamd_fstring_append(rspamd_fstring_t *str, const char *in, gsize len)
  131. {
  132. if (str == NULL) {
  133. str = rspamd_fstring_new_init(in, len);
  134. }
  135. else {
  136. gsize avail = fstravail(str);
  137. if (avail < len) {
  138. str = rspamd_fstring_grow(str, len);
  139. }
  140. memcpy(str->str + str->len, in, len);
  141. str->len += len;
  142. }
  143. return str;
  144. }
  145. rspamd_fstring_t *
  146. rspamd_fstring_append_chars(rspamd_fstring_t *str,
  147. char c, gsize len)
  148. {
  149. if (str == NULL) {
  150. str = rspamd_fstring_sized_new(len);
  151. memset(str->str + str->len, c, len);
  152. str->len += len;
  153. }
  154. else {
  155. gsize avail = fstravail(str);
  156. if (avail < len) {
  157. str = rspamd_fstring_grow(str, len);
  158. }
  159. memset(str->str + str->len, c, len);
  160. str->len += len;
  161. }
  162. return str;
  163. }
  164. void rspamd_fstring_erase(rspamd_fstring_t *str, gsize pos, gsize len)
  165. {
  166. if (pos < str->len) {
  167. if (pos + len > str->len) {
  168. len = str->len - pos;
  169. }
  170. if (len == str->len - pos) {
  171. /* Fast path */
  172. str->len = pos;
  173. }
  174. else {
  175. memmove(str->str + pos, str->str + pos + len, str->len - pos);
  176. str->len -= pos;
  177. }
  178. }
  179. else {
  180. /* Do nothing */
  181. }
  182. }
  183. /* Compat code */
  184. static uint64_t
  185. fstrhash_c(uint64_t c, uint64_t hval)
  186. {
  187. return mum_hash_step(hval, c);
  188. }
  189. /*
  190. * Return hash value for a string
  191. */
  192. uint32_t
  193. rspamd_fstrhash_lc(const rspamd_ftok_t *str, gboolean is_utf)
  194. {
  195. gsize i;
  196. uint64_t hval;
  197. const gchar *p, *end = NULL;
  198. gunichar uc;
  199. if (str == NULL) {
  200. return 0;
  201. }
  202. p = str->begin;
  203. hval = str->len;
  204. end = p + str->len;
  205. if (is_utf) {
  206. if (rspamd_fast_utf8_validate(p, str->len) != 0) {
  207. return rspamd_fstrhash_lc(str, FALSE);
  208. }
  209. while (p < end) {
  210. uc = g_unichar_tolower(g_utf8_get_char(p));
  211. hval = fstrhash_c(uc, hval);
  212. p = g_utf8_next_char(p);
  213. }
  214. }
  215. else {
  216. gsize large_steps = str->len / sizeof(uint64_t);
  217. for (i = 0; i < large_steps; i++, p += sizeof(uint64_t)) {
  218. /* Copy to the uint64 lowercasing each byte */
  219. union {
  220. char c[sizeof(uint64_t)];
  221. uint64_t iu64;
  222. } t;
  223. for (int j = 0; j < sizeof(uint64_t); j++) {
  224. t.c[j] = g_ascii_tolower(p[j]);
  225. }
  226. hval = fstrhash_c(t.iu64, hval);
  227. }
  228. gsize remain = str->len % sizeof(uint64_t);
  229. for (i = 0; i < remain; i++, p++) {
  230. hval = fstrhash_c(g_ascii_tolower(*p), hval);
  231. }
  232. }
  233. return hval;
  234. }
  235. gboolean
  236. rspamd_fstring_equal(const rspamd_fstring_t *s1,
  237. const rspamd_fstring_t *s2)
  238. {
  239. g_assert(s1 != NULL && s2 != NULL);
  240. if (s1->len == s2->len) {
  241. return (memcmp(s1->str, s2->str, s1->len) == 0);
  242. }
  243. return FALSE;
  244. }
  245. gint rspamd_fstring_casecmp(const rspamd_fstring_t *s1,
  246. const rspamd_fstring_t *s2)
  247. {
  248. gint ret = 0;
  249. g_assert(s1 != NULL && s2 != NULL);
  250. if (s1->len == s2->len) {
  251. ret = rspamd_lc_cmp(s1->str, s2->str, s1->len);
  252. }
  253. else {
  254. ret = s1->len - s2->len;
  255. }
  256. return ret;
  257. }
  258. gint rspamd_fstring_cmp(const rspamd_fstring_t *s1,
  259. const rspamd_fstring_t *s2)
  260. {
  261. g_assert(s1 != NULL && s2 != NULL);
  262. if (s1->len == s2->len) {
  263. return memcmp(s1->str, s2->str, s1->len);
  264. }
  265. return s1->len - s2->len;
  266. }
  267. gint rspamd_ftok_casecmp(const rspamd_ftok_t *s1,
  268. const rspamd_ftok_t *s2)
  269. {
  270. gint ret = 0;
  271. g_assert(s1 != NULL && s2 != NULL);
  272. if (s1->len == s2->len) {
  273. ret = rspamd_lc_cmp(s1->begin, s2->begin, s1->len);
  274. }
  275. else {
  276. ret = s1->len - s2->len;
  277. }
  278. return ret;
  279. }
  280. gint rspamd_ftok_cmp(const rspamd_ftok_t *s1,
  281. const rspamd_ftok_t *s2)
  282. {
  283. g_assert(s1 != NULL && s2 != NULL);
  284. if (s1->len == s2->len) {
  285. return memcmp(s1->begin, s2->begin, s1->len);
  286. }
  287. return s1->len - s2->len;
  288. }
  289. gboolean
  290. rspamd_ftok_starts_with(const rspamd_ftok_t *s1,
  291. const rspamd_ftok_t *s2)
  292. {
  293. g_assert(s1 != NULL && s2 != NULL);
  294. if (s1->len >= s2->len) {
  295. return !!(memcmp(s1->begin, s2->begin, s2->len) == 0);
  296. }
  297. return FALSE;
  298. }
  299. void rspamd_fstring_mapped_ftok_free(gpointer p)
  300. {
  301. rspamd_ftok_t *tok = p;
  302. rspamd_fstring_t *storage;
  303. storage = (rspamd_fstring_t *) (tok->begin - 2 * sizeof(gsize));
  304. rspamd_fstring_free(storage);
  305. g_free(tok);
  306. }
  307. rspamd_ftok_t *
  308. rspamd_ftok_map(const rspamd_fstring_t *s)
  309. {
  310. rspamd_ftok_t *tok;
  311. g_assert(s != NULL);
  312. tok = g_malloc(sizeof(*tok));
  313. tok->begin = s->str;
  314. tok->len = s->len;
  315. return tok;
  316. }
  317. char *
  318. rspamd_fstring_cstr(const rspamd_fstring_t *s)
  319. {
  320. char *result;
  321. if (s == NULL) {
  322. return NULL;
  323. }
  324. result = g_malloc(s->len + 1);
  325. memcpy(result, s->str, s->len);
  326. result[s->len] = '\0';
  327. return result;
  328. }
  329. char *
  330. rspamd_ftok_cstr(const rspamd_ftok_t *s)
  331. {
  332. char *result;
  333. if (s == NULL) {
  334. return NULL;
  335. }
  336. result = g_malloc(s->len + 1);
  337. memcpy(result, s->begin, s->len);
  338. result[s->len] = '\0';
  339. return result;
  340. }
  341. gboolean
  342. rspamd_ftok_cstr_equal(const rspamd_ftok_t *s, const gchar *pat,
  343. gboolean icase)
  344. {
  345. gsize slen;
  346. rspamd_ftok_t srch;
  347. g_assert(s != NULL);
  348. g_assert(pat != NULL);
  349. slen = strlen(pat);
  350. srch.begin = pat;
  351. srch.len = slen;
  352. if (icase) {
  353. return (rspamd_ftok_casecmp(s, &srch) == 0);
  354. }
  355. return (rspamd_ftok_cmp(s, &srch) == 0);
  356. }
  357. gchar *
  358. rspamd_ftokdup(const rspamd_ftok_t *src)
  359. {
  360. gchar *newstr;
  361. if (src == NULL) {
  362. return NULL;
  363. }
  364. newstr = g_malloc(src->len + 1);
  365. memcpy(newstr, src->begin, src->len);
  366. newstr[src->len] = '\0';
  367. return newstr;
  368. }
  369. gchar *
  370. rspamd_fstringdup(const rspamd_fstring_t *src)
  371. {
  372. gchar *newstr;
  373. if (src == NULL) {
  374. return NULL;
  375. }
  376. newstr = g_malloc(src->len + 1);
  377. memcpy(newstr, src->str, src->len);
  378. newstr[src->len] = '\0';
  379. return newstr;
  380. }