diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-03-05 22:47:28 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-03-05 22:47:28 +0300 |
commit | c9af649b3316857e77ec3509ca93c4a4ff37f477 (patch) | |
tree | 5b997f78a49366d3bb07f32cae29ed6b4dd25cf4 /src/cdb/cdb_make.c | |
parent | 5b4b47c6c4d81d0ca082617293d7284396998e0d (diff) | |
download | rspamd-c9af649b3316857e77ec3509ca93c4a4ff37f477.tar.gz rspamd-c9af649b3316857e77ec3509ca93c4a4ff37f477.zip |
* Add ability to lookup CDB maps from lua
* Add cdb:// map to multimap plugin
Diffstat (limited to 'src/cdb/cdb_make.c')
-rw-r--r-- | src/cdb/cdb_make.c | 524 |
1 files changed, 524 insertions, 0 deletions
diff --git a/src/cdb/cdb_make.c b/src/cdb/cdb_make.c new file mode 100644 index 000000000..b5ce28eeb --- /dev/null +++ b/src/cdb/cdb_make.c @@ -0,0 +1,524 @@ +/* $Id: cdb_init.c,v 1.12 2008-11-06 18:07:04 mjt Exp $ + * cdb_init, cdb_free and cdb_read routines + * + * This file is a part of tinycdb package by Michael Tokarev, mjt@corpit.ru. + * Public domain. + */ + +#include "cdb.h" + +void cdb_pack(unsigned num, unsigned char buf[4]) +{ + buf[0] = num & 255; + num >>= 8; + buf[1] = num & 255; + num >>= 8; + buf[2] = num & 255; + buf[3] = num >> 8; +} + +int cdb_make_start(struct cdb_make *cdbmp, int fd) +{ + memset (cdbmp, 0, sizeof(*cdbmp)); + cdbmp->cdb_fd = fd; + cdbmp->cdb_dpos = 2048; + cdbmp->cdb_bpos = cdbmp->cdb_buf + 2048; + return 0; +} + +int +_cdb_make_fullwrite(int fd, const unsigned char *buf, unsigned len) +{ + while (len) { + int l = write (fd, buf, len); + if (l > 0) { + len -= l; + buf += l; + } + else if (l < 0 && errno != EINTR) + return -1; + } + return 0; +} + +int +_cdb_make_flush(struct cdb_make *cdbmp) +{ + unsigned len = cdbmp->cdb_bpos - cdbmp->cdb_buf; + if (len) { + if (_cdb_make_fullwrite (cdbmp->cdb_fd, cdbmp->cdb_buf, len) < 0) + return -1; + cdbmp->cdb_bpos = cdbmp->cdb_buf; + } + return 0; +} + +int +_cdb_make_write(struct cdb_make *cdbmp, const unsigned char *ptr, unsigned len) +{ + unsigned l = sizeof(cdbmp->cdb_buf) - (cdbmp->cdb_bpos - cdbmp->cdb_buf); + cdbmp->cdb_dpos += len; + if (len > l) { + memcpy (cdbmp->cdb_bpos, ptr, l); + cdbmp->cdb_bpos += l; + if (_cdb_make_flush (cdbmp) < 0) + return -1; + ptr += l; + len -= l; + l = len / sizeof(cdbmp->cdb_buf); + if (l) { + l *= sizeof(cdbmp->cdb_buf); + if (_cdb_make_fullwrite (cdbmp->cdb_fd, ptr, l) < 0) + return -1; + ptr += l; + len -= l; + } + } + if (len) { + memcpy (cdbmp->cdb_bpos, ptr, len); + cdbmp->cdb_bpos += len; + } + return 0; +} + +static int cdb_make_finish_internal(struct cdb_make *cdbmp) +{ + unsigned hcnt[256]; /* hash table counts */ + unsigned hpos[256]; /* hash table positions */ + struct cdb_rec *htab; + unsigned char *p; + struct cdb_rl *rl; + unsigned hsize; + unsigned t, i; + + if (((0xffffffff - cdbmp->cdb_dpos) >> 3) < cdbmp->cdb_rcnt) + return errno = ENOMEM, -1; + + /* count htab sizes and reorder reclists */ + hsize = 0; + for (t = 0; t < 256; ++t) { + struct cdb_rl *rlt = NULL; + i = 0; + rl = cdbmp->cdb_rec[t]; + while (rl) { + struct cdb_rl *rln = rl->next; + rl->next = rlt; + rlt = rl; + i += rl->cnt; + rl = rln; + } + cdbmp->cdb_rec[t] = rlt; + if (hsize < (hcnt[t] = i << 1)) + hsize = hcnt[t]; + } + + /* allocate memory to hold max htable */ + htab = (struct cdb_rec*) malloc ((hsize + 2) * sizeof(struct cdb_rec)); + if (!htab) + return errno = ENOENT, -1; + p = (unsigned char *) htab; + htab += 2; + + /* build hash tables */ + for (t = 0; t < 256; ++t) { + unsigned len, hi; + hpos[t] = cdbmp->cdb_dpos; + if ((len = hcnt[t]) == 0) + continue; + for (i = 0; i < len; ++i) + htab[i].hval = htab[i].rpos = 0; + for (rl = cdbmp->cdb_rec[t]; rl; rl = rl->next) + for (i = 0; i < rl->cnt; ++i) { + hi = (rl->rec[i].hval >> 8) % len; + while (htab[hi].rpos) + if (++hi == len) + hi = 0; + htab[hi] = rl->rec[i]; + } + for (i = 0; i < len; ++i) { + cdb_pack (htab[i].hval, p + (i << 3)); + cdb_pack (htab[i].rpos, p + (i << 3) + 4); + } + if (_cdb_make_write (cdbmp, p, len << 3) < 0) { + free (p); + return -1; + } + } + free (p); + if (_cdb_make_flush (cdbmp) < 0) + return -1; + p = cdbmp->cdb_buf; + for (t = 0; t < 256; ++t) { + cdb_pack (hpos[t], p + (t << 3)); + cdb_pack (hcnt[t], p + (t << 3) + 4); + } + if (lseek (cdbmp->cdb_fd, 0, 0) != 0 || _cdb_make_fullwrite (cdbmp->cdb_fd, + p, 2048) != 0) + return -1; + + return 0; +} + +static void cdb_make_free(struct cdb_make *cdbmp) +{ + unsigned t; + for (t = 0; t < 256; ++t) { + struct cdb_rl *rl = cdbmp->cdb_rec[t]; + while (rl) { + struct cdb_rl *tm = rl; + rl = rl->next; + free (tm); + } + } +} + +int cdb_make_finish(struct cdb_make *cdbmp) +{ + int r = cdb_make_finish_internal (cdbmp); + cdb_make_free (cdbmp); + return r; +} + +int +_cdb_make_add(struct cdb_make *cdbmp, unsigned hval, const void *key, + unsigned klen, const void *val, unsigned vlen) +{ + unsigned char rlen[8]; + struct cdb_rl *rl; + unsigned i; + if (klen > 0xffffffff - (cdbmp->cdb_dpos + 8) || vlen > 0xffffffff + - (cdbmp->cdb_dpos + klen + 8)) + return errno = ENOMEM, -1; + i = hval & 255; + rl = cdbmp->cdb_rec[i]; + if (!rl || rl->cnt >= sizeof(rl->rec) / sizeof(rl->rec[0])) { + rl = (struct cdb_rl*) malloc (sizeof(struct cdb_rl)); + if (!rl) + return errno = ENOMEM, -1; + rl->cnt = 0; + rl->next = cdbmp->cdb_rec[i]; + cdbmp->cdb_rec[i] = rl; + } + i = rl->cnt++; + rl->rec[i].hval = hval; + rl->rec[i].rpos = cdbmp->cdb_dpos; + ++cdbmp->cdb_rcnt; + cdb_pack (klen, rlen); + cdb_pack (vlen, rlen + 4); + if (_cdb_make_write (cdbmp, rlen, 8) < 0 || _cdb_make_write (cdbmp, key, + klen) < 0 || _cdb_make_write (cdbmp, val, vlen) < 0) + return -1; + return 0; +} + +int cdb_make_add(struct cdb_make *cdbmp, const void *key, unsigned klen, + const void *val, unsigned vlen) +{ + return _cdb_make_add (cdbmp, cdb_hash (key, klen), key, klen, val, vlen); +} + +static void fixup_rpos(struct cdb_make *cdbmp, unsigned rpos, unsigned rlen) +{ + unsigned i; + struct cdb_rl *rl; + register struct cdb_rec *rp, *rs; + for (i = 0; i < 256; ++i) { + for (rl = cdbmp->cdb_rec[i]; rl; rl = rl->next) + for (rs = rl->rec, rp = rs + rl->cnt; --rp >= rs;) + if (rp->rpos <= rpos) + goto nexthash; + else + rp->rpos -= rlen; + nexthash: ; + } +} + +static int remove_record(struct cdb_make *cdbmp, unsigned rpos, unsigned rlen) +{ + unsigned pos, len; + int r, fd; + + len = cdbmp->cdb_dpos - rpos - rlen; + cdbmp->cdb_dpos -= rlen; + if (!len) + return 0; /* it was the last record, nothing to do */ + pos = rpos; + fd = cdbmp->cdb_fd; + do { + r = len > sizeof(cdbmp->cdb_buf) ? sizeof(cdbmp->cdb_buf) : len; + if (lseek (fd, pos + rlen, SEEK_SET) < 0 || (r = read (fd, + cdbmp->cdb_buf, r)) <= 0) + return -1; + if (lseek (fd, pos, SEEK_SET) < 0 || _cdb_make_fullwrite (fd, + cdbmp->cdb_buf, r) < 0) + return -1; + pos += r; + len -= r; + } while (len); + g_assert (cdbmp->cdb_dpos == pos); + fixup_rpos (cdbmp, rpos, rlen); + return 0; +} + +static int zerofill_record(struct cdb_make *cdbmp, unsigned rpos, unsigned rlen) +{ + if (rpos + rlen == cdbmp->cdb_dpos) { + cdbmp->cdb_dpos = rpos; + return 0; + } + if (lseek (cdbmp->cdb_fd, rpos, SEEK_SET) < 0) + return -1; + memset (cdbmp->cdb_buf, 0, sizeof(cdbmp->cdb_buf)); + cdb_pack (rlen - 8, cdbmp->cdb_buf + 4); + for (;;) { + rpos = rlen > sizeof(cdbmp->cdb_buf) ? sizeof(cdbmp->cdb_buf) : rlen; + if (_cdb_make_fullwrite (cdbmp->cdb_fd, cdbmp->cdb_buf, rpos) < 0) + return -1; + rlen -= rpos; + if (!rlen) + return 0; + memset (cdbmp->cdb_buf + 4, 0, 4); + } +} + +/* return: 0 = not found, 1 = error, or record length */ +static unsigned match(struct cdb_make *cdbmp, unsigned pos, const char *key, + unsigned klen) +{ + int len; + unsigned rlen; + if (lseek (cdbmp->cdb_fd, pos, SEEK_SET) < 0) + return 1; + if (read (cdbmp->cdb_fd, cdbmp->cdb_buf, 8) != 8) + return 1; + if (cdb_unpack (cdbmp->cdb_buf) != klen) + return 0; + + /* record length; check its validity */ + rlen = cdb_unpack (cdbmp->cdb_buf + 4); + if (rlen > cdbmp->cdb_dpos - pos - klen - 8) + return errno = EPROTO, 1; /* someone changed our file? */ + rlen += klen + 8; + + while (klen) { + len = klen > sizeof(cdbmp->cdb_buf) ? sizeof(cdbmp->cdb_buf) : klen; + len = read (cdbmp->cdb_fd, cdbmp->cdb_buf, len); + if (len <= 0) + return 1; + if (memcmp (cdbmp->cdb_buf, key, len) != 0) + return 0; + key += len; + klen -= len; + } + + return rlen; +} + +static int findrec(struct cdb_make *cdbmp, const void *key, unsigned klen, + unsigned hval, enum cdb_put_mode mode) +{ + struct cdb_rl *rl; + struct cdb_rec *rp, *rs; + unsigned r; + int seeked = 0; + int ret = 0; + for (rl = cdbmp->cdb_rec[hval & 255]; rl; rl = rl->next) + for (rs = rl->rec, rp = rs + rl->cnt; --rp >= rs;) { + if (rp->hval != hval) + continue; + /*XXX this explicit flush may be unnecessary having + * smarter match() that looks into cdb_buf too, but + * most of a time here spent in finding hash values + * (above), not keys */ + if (!seeked && _cdb_make_flush (cdbmp) < 0) + return -1; + seeked = 1; + r = match (cdbmp, rp->rpos, key, klen); + if (!r) + continue; + if (r == 1) + return -1; + ret = 1; + switch (mode) + { + case CDB_FIND_REMOVE: + if (remove_record (cdbmp, rp->rpos, r) < 0) + return -1; + break; + case CDB_FIND_FILL0: + if (zerofill_record (cdbmp, rp->rpos, r) < 0) + return -1; + break; + default: + goto finish; + } + memmove (rp, rp + 1, (rs + rl->cnt - 1 - rp) * sizeof(*rp)); + --rl->cnt; + --cdbmp->cdb_rcnt; + } + finish: if (seeked && lseek (cdbmp->cdb_fd, cdbmp->cdb_dpos, SEEK_SET) < 0) + return -1; + return ret; +} + +int cdb_make_find(struct cdb_make *cdbmp, const void *key, unsigned klen, + enum cdb_put_mode mode) +{ + return findrec (cdbmp, key, klen, cdb_hash (key, klen), mode); +} + +int cdb_make_exists(struct cdb_make *cdbmp, const void *key, unsigned klen) +{ + return cdb_make_find (cdbmp, key, klen, CDB_FIND); +} + +int cdb_make_put(struct cdb_make *cdbmp, const void *key, unsigned klen, + const void *val, unsigned vlen, enum cdb_put_mode mode) +{ + unsigned hval = cdb_hash (key, klen); + int r; + + switch (mode) + { + case CDB_PUT_REPLACE: + case CDB_PUT_INSERT: + case CDB_PUT_WARN: + case CDB_PUT_REPLACE0: + r = findrec (cdbmp, key, klen, hval, mode); + if (r < 0) + return -1; + if (r && mode == CDB_PUT_INSERT) + return errno = EEXIST, 1; + break; + + case CDB_PUT_ADD: + r = 0; + break; + + default: + return errno = EINVAL, -1; + } + + if (_cdb_make_add (cdbmp, hval, key, klen, val, vlen) < 0) + return -1; + + return r; +} + +unsigned +cdb_unpack(const unsigned char buf[4]) +{ + unsigned n = buf[3]; + n <<= 8; n |= buf[2]; + n <<= 8; n |= buf[1]; + n <<= 8; n |= buf[0]; + return n; +} + +int +cdb_seqnext(unsigned *cptr, struct cdb *cdbp) { + unsigned klen, vlen; + unsigned pos = *cptr; + unsigned dend = cdbp->cdb_dend; + const unsigned char *mem = cdbp->cdb_mem; + if (pos > dend - 8) + return 0; + klen = cdb_unpack(mem + pos); + vlen = cdb_unpack(mem + pos + 4); + pos += 8; + if (dend - klen < pos || dend - vlen < pos + klen) + return errno = EPROTO, -1; + cdbp->cdb_kpos = pos; + cdbp->cdb_klen = klen; + cdbp->cdb_vpos = pos + klen; + cdbp->cdb_vlen = vlen; + *cptr = pos + klen + vlen; + return 1; +} + +/* read a chunk from file, ignoring interrupts (EINTR) */ + +int +cdb_bread(int fd, void *buf, int len) +{ + int l; + while(len > 0) { + do l = read(fd, buf, len); + while(l < 0 && errno == EINTR); + if (l <= 0) { + if (!l) + errno = EIO; + return -1; + } + buf = (char*)buf + l; + len -= l; + } + return 0; +} + +/* find a given key in cdb file, seek a file pointer to it's value and + place data length to *dlenp. */ + +int +cdb_seek(int fd, const void *key, unsigned klen, unsigned *dlenp) +{ + unsigned htstart; /* hash table start position */ + unsigned htsize; /* number of elements in a hash table */ + unsigned httodo; /* hash table elements left to look */ + unsigned hti; /* hash table index */ + unsigned pos; /* position in a file */ + unsigned hval; /* key's hash value */ + unsigned char rbuf[64]; /* read buffer */ + int needseek = 1; /* if we should seek to a hash slot */ + + hval = cdb_hash(key, klen); + pos = (hval & 0xff) << 3; /* position in TOC */ + /* read the hash table parameters */ + if (lseek(fd, pos, SEEK_SET) < 0 || cdb_bread(fd, rbuf, 8) < 0) + return -1; + if ((htsize = cdb_unpack(rbuf + 4)) == 0) + return 0; + hti = (hval >> 8) % htsize; /* start position in hash table */ + httodo = htsize; + htstart = cdb_unpack(rbuf); + + for(;;) { + if (needseek && lseek(fd, htstart + (hti << 3), SEEK_SET) < 0) + return -1; + if (cdb_bread(fd, rbuf, 8) < 0) + return -1; + if ((pos = cdb_unpack(rbuf + 4)) == 0) /* not found */ + return 0; + + if (cdb_unpack(rbuf) != hval) /* hash value not matched */ + needseek = 0; + else { /* hash value matched */ + if (lseek(fd, pos, SEEK_SET) < 0 || cdb_bread(fd, rbuf, 8) < 0) + return -1; + if (cdb_unpack(rbuf) == klen) { /* key length matches */ + /* read the key from file and compare with wanted */ + unsigned l = klen, c; + const char *k = (const char*)key; + if (dlenp) + *dlenp = cdb_unpack(rbuf + 4); /* save value length */ + for(;;) { + if (!l) /* the whole key read and matches, return */ + return 1; + c = l > sizeof(rbuf) ? sizeof(rbuf) : l; + if (cdb_bread(fd, rbuf, c) < 0) + return -1; + if (memcmp(rbuf, k, c) != 0) /* no, it differs, stop here */ + break; + k += c; l -= c; + } + } + needseek = 1; /* we're looked to other place, should seek back */ + } + if (!--httodo) + return 0; + if (++hti == htsize) { + hti = 0; + needseek = 1; + } + } +} |