123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- /*-
- * Copyright 2021 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #ifndef RSPAMD_HTML_H
- #define RSPAMD_HTML_H
-
- #include "config.h"
- #include "libutil/mem_pool.h"
- #include "libserver/url.h"
-
- #ifdef __cplusplus
- extern "C" {
- #endif
-
- /*
- * HTML content flags
- */
- #define RSPAMD_HTML_FLAG_BAD_START (1 << 0)
- #define RSPAMD_HTML_FLAG_BAD_ELEMENTS (1 << 1)
- #define RSPAMD_HTML_FLAG_XML (1 << 2)
- #define RSPAMD_HTML_FLAG_UNBALANCED (1 << 3)
- #define RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS (1 << 4)
- #define RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS (1 << 5)
- #define RSPAMD_HTML_FLAG_TOO_MANY_TAGS (1 << 6)
- #define RSPAMD_HTML_FLAG_HAS_DATA_URLS (1 << 7)
- #define RSPAMD_HTML_FLAG_HAS_ZEROS (1 << 8)
-
- /*
- * Image flags
- */
- #define RSPAMD_HTML_FLAG_IMAGE_EMBEDDED (1 << 0)
- #define RSPAMD_HTML_FLAG_IMAGE_EXTERNAL (1 << 1)
- #define RSPAMD_HTML_FLAG_IMAGE_DATA (1 << 2)
-
-
- struct rspamd_image;
-
- struct html_image {
- unsigned int height;
- unsigned int width;
- unsigned int flags;
- char *src;
- struct rspamd_url *url;
- struct rspamd_image *embedded_image;
- void *tag;
- };
-
-
- /* Forwarded declaration */
- struct rspamd_task;
-
- /*
- * Decode HTML entitles in text. Text is modified in place.
- */
- unsigned int rspamd_html_decode_entitles_inplace(char *s, gsize len);
-
- void *rspamd_html_process_part(rspamd_mempool_t *pool,
- GByteArray *in);
-
- void *rspamd_html_process_part_full(struct rspamd_task *task,
- GByteArray *in, GList **exceptions,
- khash_t(rspamd_url_hash) * url_set,
- GPtrArray *part_urls,
- bool allow_css,
- uint16_t *cur_url_order);
-
- /*
- * Returns true if a specified tag has been seen in a part
- */
- gboolean rspamd_html_tag_seen(void *ptr, const char *tagname);
-
- /**
- * Returns name for the specified tag id
- * @param id
- * @return
- */
- const char *rspamd_html_tag_by_id(int id);
-
- /**
- * Returns HTML tag id by name
- * @param name
- * @return
- */
- int rspamd_html_tag_by_name(const char *name);
-
- /**
- * Gets a name for a tag
- * @param tag
- * @param len
- * @return
- */
- const char *rspamd_html_tag_name(void *tag, gsize *len);
-
- /**
- * Find HTML image by content id
- * @param html_content
- * @param cid
- * @param cid_len
- * @return
- */
- struct html_image *rspamd_html_find_embedded_image(void *html_content,
- const char *cid, gsize cid_len);
-
- /**
- * Stores parsed content in ftok_t structure
- * @param html_content
- * @param dest
- * @return
- */
- bool rspamd_html_get_parsed_content(void *html_content, rspamd_ftok_t *dest);
-
- /**
- * Returns number of tags in the html content
- * @param html_content
- * @return
- */
- gsize rspamd_html_get_tags_count(void *html_content);
-
-
- #ifdef __cplusplus
- }
- #endif
-
- #endif
|