Browse Source

[Rework] Move compression routines outside of rspamd_util library

tags/3.0
Vsevolod Stakhov 3 years ago
parent
commit
2972ef051b
4 changed files with 383 additions and 316 deletions
  1. 2
    1
      src/lua/CMakeLists.txt
  2. 340
    0
      src/lua/lua_compress.c
  3. 35
    0
      src/lua/lua_compress.h
  4. 6
    315
      src/lua/lua_util.c

+ 2
- 1
src/lua/CMakeLists.txt View File

@@ -33,6 +33,7 @@ SET(LUASRC ${CMAKE_CURRENT_SOURCE_DIR}/lua_common.c
${CMAKE_CURRENT_SOURCE_DIR}/lua_kann.c
${CMAKE_CURRENT_SOURCE_DIR}/lua_spf.c
${CMAKE_CURRENT_SOURCE_DIR}/lua_tensor.c
${CMAKE_CURRENT_SOURCE_DIR}/lua_parsers.c)
${CMAKE_CURRENT_SOURCE_DIR}/lua_parsers.c
${CMAKE_CURRENT_SOURCE_DIR}/lua_compress.c)

SET(RSPAMD_LUA ${LUASRC} PARENT_SCOPE)

+ 340
- 0
src/lua/lua_compress.c View File

@@ -0,0 +1,340 @@
/*-
* Copyright 2021 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "lua_common.h"
#include "unix-std.h"
#include "contrib/zstd/zstd.h"
#include <zlib.h>

/***
* @module rspamd_compress
* This module contains compression/decompression routines (zstd and zlib currently)
*/

/***
* @function zstd.compress_ctx()
* Creates new compression ctx
* @return {compress_ctx} new compress ctx
*/
LUA_FUNCTION_DEF (zstd, compress_ctx);

/***
* @function zstd.compress_ctx()
* Creates new compression ctx
* @return {compress_ctx} new compress ctx
*/
LUA_FUNCTION_DEF (zstd, decompress_ctx);


gint
lua_compress_zstd_compress (lua_State *L)
{
LUA_TRACE_POINT;
struct rspamd_lua_text *t = NULL, *res;
gsize sz, r;
gint comp_level = 1;

t = lua_check_text_or_string (L,1);

if (t == NULL || t->start == NULL) {
return luaL_error (L, "invalid arguments");
}

if (lua_type (L, 2) == LUA_TNUMBER) {
comp_level = lua_tointeger (L, 2);
}

sz = ZSTD_compressBound (t->len);

if (ZSTD_isError (sz)) {
msg_err ("cannot compress data: %s", ZSTD_getErrorName (sz));
lua_pushnil (L);

return 1;
}

res = lua_newuserdata (L, sizeof (*res));
res->start = g_malloc (sz);
res->flags = RSPAMD_TEXT_FLAG_OWN;
rspamd_lua_setclass (L, "rspamd{text}", -1);
r = ZSTD_compress ((void *)res->start, sz, t->start, t->len, comp_level);

if (ZSTD_isError (r)) {
msg_err ("cannot compress data: %s", ZSTD_getErrorName (r));
lua_pop (L, 1); /* Text will be freed here */
lua_pushnil (L);

return 1;
}

res->len = r;

return 1;
}

gint
lua_compress_zstd_decompress (lua_State *L)
{
LUA_TRACE_POINT;
struct rspamd_lua_text *t = NULL, *res;
gsize outlen, r;
ZSTD_DStream *zstream;
ZSTD_inBuffer zin;
ZSTD_outBuffer zout;
gchar *out;

t = lua_check_text_or_string (L,1);

if (t == NULL || t->start == NULL) {
return luaL_error (L, "invalid arguments");
}

zstream = ZSTD_createDStream ();
ZSTD_initDStream (zstream);

zin.pos = 0;
zin.src = t->start;
zin.size = t->len;

if ((outlen = ZSTD_getDecompressedSize (zin.src, zin.size)) == 0) {
outlen = ZSTD_DStreamOutSize ();
}

out = g_malloc (outlen);

zout.dst = out;
zout.pos = 0;
zout.size = outlen;

while (zin.pos < zin.size) {
r = ZSTD_decompressStream (zstream, &zout, &zin);

if (ZSTD_isError (r)) {
msg_err ("cannot decompress data: %s", ZSTD_getErrorName (r));
ZSTD_freeDStream (zstream);
g_free (out);
lua_pushstring (L, ZSTD_getErrorName (r));
lua_pushnil (L);

return 2;
}

if (zin.pos < zin.size && zout.pos == zout.size) {
/* We need to extend output buffer */
zout.size = zout.size * 2;
out = g_realloc (zout.dst, zout.size);
zout.dst = out;
}
}

ZSTD_freeDStream (zstream);
lua_pushnil (L); /* Error */
res = lua_newuserdata (L, sizeof (*res));
res->start = out;
res->flags = RSPAMD_TEXT_FLAG_OWN;
rspamd_lua_setclass (L, "rspamd{text}", -1);
res->len = zout.pos;

return 2;
}

gint
lua_compress_zlib_decompress (lua_State *L, bool is_gzip)
{
LUA_TRACE_POINT;
struct rspamd_lua_text *t = NULL, *res;
gsize sz;
z_stream strm;
gint rc;
guchar *p;
gsize remain;
gssize size_limit = -1;

int windowBits = is_gzip ? (MAX_WBITS + 16) : (MAX_WBITS);

t = lua_check_text_or_string (L,1);

if (t == NULL || t->start == NULL) {
return luaL_error (L, "invalid arguments");
}

if (lua_type (L, 2) == LUA_TNUMBER) {
size_limit = lua_tointeger (L, 2);
if (size_limit <= 0) {
return luaL_error (L, "invalid arguments (size_limit)");
}

sz = MIN (t->len * 2, size_limit);
}
else {
sz = t->len * 2;
}

memset (&strm, 0, sizeof (strm));
/* windowBits +16 to decode gzip, zlib 1.2.0.4+ */

/* Here are dragons to distinguish between raw deflate and zlib */
if (windowBits == MAX_WBITS && t->len > 0) {
if ((int)(unsigned char)((t->start[0] << 4)) != 0x80) {
/* Assume raw deflate */
windowBits = -windowBits;
}
}

rc = inflateInit2 (&strm, windowBits);

if (rc != Z_OK) {
return luaL_error (L, "cannot init zlib");
}

strm.avail_in = t->len;
strm.next_in = (guchar *)t->start;

res = lua_newuserdata (L, sizeof (*res));
res->start = g_malloc (sz);
res->flags = RSPAMD_TEXT_FLAG_OWN;
rspamd_lua_setclass (L, "rspamd{text}", -1);

p = (guchar *)res->start;
remain = sz;

while (strm.avail_in != 0) {
strm.avail_out = remain;
strm.next_out = p;

rc = inflate (&strm, Z_NO_FLUSH);

if (rc != Z_OK && rc != Z_BUF_ERROR) {
if (rc == Z_STREAM_END) {
break;
}
else {
msg_err ("cannot decompress data: %s (last error: %s)",
zError (rc), strm.msg);
lua_pop (L, 1); /* Text will be freed here */
lua_pushnil (L);
inflateEnd (&strm);

return 1;
}
}

res->len = strm.total_out;

if (strm.avail_out == 0 && strm.avail_in != 0) {

if (size_limit > 0 || res->len >= G_MAXUINT32 / 2) {
if (res->len > size_limit || res->len >= G_MAXUINT32 / 2) {
lua_pop (L, 1); /* Text will be freed here */
lua_pushnil (L);
inflateEnd (&strm);

return 1;
}
}

/* Need to allocate more */
remain = res->len;
res->start = g_realloc ((gpointer)res->start, res->len * 2);
sz = res->len * 2;
p = (guchar *)res->start + remain;
remain = sz - remain;
}
}

inflateEnd (&strm);
res->len = strm.total_out;

return 1;
}

gint
lua_compress_zlib_compress (lua_State *L)
{
LUA_TRACE_POINT;
struct rspamd_lua_text *t = NULL, *res;
gsize sz;
z_stream strm;
gint rc;
guchar *p;
gsize remain;

t = lua_check_text_or_string (L,1);

if (t == NULL || t->start == NULL) {
return luaL_error (L, "invalid arguments");
}


memset (&strm, 0, sizeof (strm));
rc = deflateInit2 (&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
MAX_WBITS + 16, MAX_MEM_LEVEL - 1, Z_DEFAULT_STRATEGY);

if (rc != Z_OK) {
return luaL_error (L, "cannot init zlib: %s", zError (rc));
}

sz = deflateBound (&strm, t->len);

strm.avail_in = t->len;
strm.next_in = (guchar *) t->start;

res = lua_newuserdata (L, sizeof (*res));
res->start = g_malloc (sz);
res->flags = RSPAMD_TEXT_FLAG_OWN;
rspamd_lua_setclass (L, "rspamd{text}", -1);

p = (guchar *) res->start;
remain = sz;

while (strm.avail_in != 0) {
strm.avail_out = remain;
strm.next_out = p;

rc = deflate (&strm, Z_FINISH);

if (rc != Z_OK && rc != Z_BUF_ERROR) {
if (rc == Z_STREAM_END) {
break;
}
else {
msg_err ("cannot compress data: %s (last error: %s)",
zError (rc), strm.msg);
lua_pop (L, 1); /* Text will be freed here */
lua_pushnil (L);
deflateEnd (&strm);

return 1;
}
}

res->len = strm.total_out;

if (strm.avail_out == 0 && strm.avail_in != 0) {
/* Need to allocate more */
remain = res->len;
res->start = g_realloc ((gpointer) res->start, strm.avail_in + sz);
sz = strm.avail_in + sz;
p = (guchar *) res->start + remain;
remain = sz - remain;
}
}

deflateEnd (&strm);
res->len = strm.total_out;

return 1;
}

+ 35
- 0
src/lua/lua_compress.h View File

@@ -0,0 +1,35 @@
/*-
* Copyright 2021 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef RSPAMD_LUA_COMPRESS_H
#define RSPAMD_LUA_COMPRESS_H

#include "lua_common.h"

#ifdef __cplusplus
extern "C" {
#endif

gint lua_compress_zstd_compress (lua_State *L);
gint lua_compress_zstd_decompress (lua_State *L);
gint lua_compress_zlib_compress (lua_State *L);
gint lua_compress_zlib_decompress (lua_State *L, bool is_gzip);

#ifdef __cplusplus
}
#endif

#endif //RSPAMD_LUA_COMPRESS_H

+ 6
- 315
src/lua/lua_util.c View File

@@ -15,7 +15,7 @@
*/
#include "lua_common.h"
#include "unix-std.h"
#include "contrib/zstd/zstd.h"
#include "lua_compress.h"
#include "libmime/email_addr.h"
#include "libmime/content_type.h"
#include "libmime/mime_headers.h"
@@ -29,7 +29,6 @@

#include <math.h>
#include <glob.h>
#include <zlib.h>

#include "unicode/uspoof.h"
#include "unicode/uscript.h"
@@ -1954,339 +1953,31 @@ lua_util_random_hex (lua_State *L)
static gint
lua_util_zstd_compress (lua_State *L)
{
LUA_TRACE_POINT;
struct rspamd_lua_text *t = NULL, *res, tmp;
gsize sz, r;
gint comp_level = 1;

if (lua_type (L, 1) == LUA_TSTRING) {
t = &tmp;
t->start = lua_tolstring (L, 1, &sz);
t->len = sz;
}
else {
t = lua_check_text (L, 1);
}

if (t == NULL || t->start == NULL) {
return luaL_error (L, "invalid arguments");
}

if (lua_type (L, 2) == LUA_TNUMBER) {
comp_level = lua_tointeger (L, 2);
}

sz = ZSTD_compressBound (t->len);

if (ZSTD_isError (sz)) {
msg_err ("cannot compress data: %s", ZSTD_getErrorName (sz));
lua_pushnil (L);

return 1;
}

res = lua_newuserdata (L, sizeof (*res));
res->start = g_malloc (sz);
res->flags = RSPAMD_TEXT_FLAG_OWN;
rspamd_lua_setclass (L, "rspamd{text}", -1);
r = ZSTD_compress ((void *)res->start, sz, t->start, t->len, comp_level);

if (ZSTD_isError (r)) {
msg_err ("cannot compress data: %s", ZSTD_getErrorName (r));
lua_pop (L, 1); /* Text will be freed here */
lua_pushnil (L);

return 1;
}

res->len = r;

return 1;
return lua_compress_zstd_compress (L);
}

static gint
lua_util_zstd_decompress (lua_State *L)
{
LUA_TRACE_POINT;
struct rspamd_lua_text *t = NULL, *res;
gsize outlen, sz, r;
ZSTD_DStream *zstream;
ZSTD_inBuffer zin;
ZSTD_outBuffer zout;
gchar *out;

if (lua_type (L, 1) == LUA_TSTRING) {
t = g_alloca (sizeof (*t));
t->start = lua_tolstring (L, 1, &sz);
t->len = sz;
}
else {
t = lua_check_text (L, 1);
}

if (t == NULL || t->start == NULL) {
return luaL_error (L, "invalid arguments");
}

zstream = ZSTD_createDStream ();
ZSTD_initDStream (zstream);

zin.pos = 0;
zin.src = t->start;
zin.size = t->len;

if ((outlen = ZSTD_getDecompressedSize (zin.src, zin.size)) == 0) {
outlen = ZSTD_DStreamOutSize ();
}

out = g_malloc (outlen);

zout.dst = out;
zout.pos = 0;
zout.size = outlen;

while (zin.pos < zin.size) {
r = ZSTD_decompressStream (zstream, &zout, &zin);

if (ZSTD_isError (r)) {
msg_err ("cannot decompress data: %s", ZSTD_getErrorName (r));
ZSTD_freeDStream (zstream);
g_free (out);
lua_pushstring (L, ZSTD_getErrorName (r));
lua_pushnil (L);

return 2;
}

if (zin.pos < zin.size && zout.pos == zout.size) {
/* We need to extend output buffer */
zout.size = zout.size * 2;
out = g_realloc (zout.dst, zout.size);
zout.dst = out;
}
}

ZSTD_freeDStream (zstream);
lua_pushnil (L); /* Error */
res = lua_newuserdata (L, sizeof (*res));
res->start = out;
res->flags = RSPAMD_TEXT_FLAG_OWN;
rspamd_lua_setclass (L, "rspamd{text}", -1);
res->len = zout.pos;

return 2;
return lua_compress_zstd_decompress (L);
}

static gint
lua_util_gzip_compress (lua_State *L)
{
LUA_TRACE_POINT;
struct rspamd_lua_text *t = NULL, *res, tmp;
gsize sz;
z_stream strm;
gint rc;
guchar *p;
gsize remain;

if (lua_type (L, 1) == LUA_TSTRING) {
t = &tmp;
t->start = lua_tolstring (L, 1, &sz);
t->len = sz;
}
else {
t = lua_check_text (L, 1);
}

if (t == NULL || t->start == NULL) {
return luaL_error (L, "invalid arguments");
}


memset (&strm, 0, sizeof (strm));
rc = deflateInit2 (&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
MAX_WBITS + 16, MAX_MEM_LEVEL - 1, Z_DEFAULT_STRATEGY);

if (rc != Z_OK) {
return luaL_error (L, "cannot init zlib: %s", zError (rc));
}

sz = deflateBound (&strm, t->len);

strm.avail_in = t->len;
strm.next_in = (guchar *) t->start;

res = lua_newuserdata (L, sizeof (*res));
res->start = g_malloc (sz);
res->flags = RSPAMD_TEXT_FLAG_OWN;
rspamd_lua_setclass (L, "rspamd{text}", -1);

p = (guchar *) res->start;
remain = sz;

while (strm.avail_in != 0) {
strm.avail_out = remain;
strm.next_out = p;

rc = deflate (&strm, Z_FINISH);

if (rc != Z_OK && rc != Z_BUF_ERROR) {
if (rc == Z_STREAM_END) {
break;
}
else {
msg_err ("cannot compress data: %s (last error: %s)",
zError (rc), strm.msg);
lua_pop (L, 1); /* Text will be freed here */
lua_pushnil (L);
deflateEnd (&strm);

return 1;
}
}

res->len = strm.total_out;

if (strm.avail_out == 0 && strm.avail_in != 0) {
/* Need to allocate more */
remain = res->len;
res->start = g_realloc ((gpointer) res->start, strm.avail_in + sz);
sz = strm.avail_in + sz;
p = (guchar *) res->start + remain;
remain = sz - remain;
}
}

deflateEnd (&strm);
res->len = strm.total_out;

return 1;
return lua_compress_zlib_compress (L);
}


static gint
lua_util_zlib_inflate (lua_State *L, int windowBits)
{
LUA_TRACE_POINT;
struct rspamd_lua_text *t = NULL, *res, tmp;
gsize sz;
z_stream strm;
gint rc;
guchar *p;
gsize remain;
gssize size_limit = -1;

if (lua_type (L, 1) == LUA_TSTRING) {
t = &tmp;
t->start = lua_tolstring (L, 1, &sz);
t->len = sz;
}
else {
t = lua_check_text (L, 1);
}

if (t == NULL || t->start == NULL) {
return luaL_error (L, "invalid arguments");
}

if (lua_type (L, 2) == LUA_TNUMBER) {
size_limit = lua_tointeger (L, 2);
if (size_limit <= 0) {
return luaL_error (L, "invalid arguments (size_limit)");
}

sz = MIN (t->len * 2, size_limit);
}
else {
sz = t->len * 2;
}

memset (&strm, 0, sizeof (strm));
/* windowBits +16 to decode gzip, zlib 1.2.0.4+ */

/* Here are dragons to distinguish between raw deflate and zlib */
if (windowBits == MAX_WBITS && t->len > 0) {
if ((int)(unsigned char)((t->start[0] << 4)) != 0x80) {
/* Assume raw deflate */
windowBits = -windowBits;
}
}

rc = inflateInit2 (&strm, windowBits);

if (rc != Z_OK) {
return luaL_error (L, "cannot init zlib");
}

strm.avail_in = t->len;
strm.next_in = (guchar *)t->start;

res = lua_newuserdata (L, sizeof (*res));
res->start = g_malloc (sz);
res->flags = RSPAMD_TEXT_FLAG_OWN;
rspamd_lua_setclass (L, "rspamd{text}", -1);

p = (guchar *)res->start;
remain = sz;

while (strm.avail_in != 0) {
strm.avail_out = remain;
strm.next_out = p;

rc = inflate (&strm, Z_NO_FLUSH);

if (rc != Z_OK && rc != Z_BUF_ERROR) {
if (rc == Z_STREAM_END) {
break;
}
else {
msg_err ("cannot decompress data: %s (last error: %s)",
zError (rc), strm.msg);
lua_pop (L, 1); /* Text will be freed here */
lua_pushnil (L);
inflateEnd (&strm);

return 1;
}
}

res->len = strm.total_out;

if (strm.avail_out == 0 && strm.avail_in != 0) {

if (size_limit > 0 || res->len >= G_MAXUINT32 / 2) {
if (res->len > size_limit || res->len >= G_MAXUINT32 / 2) {
lua_pop (L, 1); /* Text will be freed here */
lua_pushnil (L);
inflateEnd (&strm);

return 1;
}
}

/* Need to allocate more */
remain = res->len;
res->start = g_realloc ((gpointer)res->start, res->len * 2);
sz = res->len * 2;
p = (guchar *)res->start + remain;
remain = sz - remain;
}
}

inflateEnd (&strm);
res->len = strm.total_out;

return 1;
}
static gint
lua_util_gzip_decompress (lua_State *L)
{
return lua_util_zlib_inflate (L, MAX_WBITS + 16);
return lua_compress_zlib_decompress (L, true);
}

static gint
lua_util_inflate (lua_State *L)
{
return lua_util_zlib_inflate (L, MAX_WBITS);
return lua_compress_zlib_decompress (L, false);
}

static gint

Loading…
Cancel
Save