You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

http_router.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. /*-
  2. * Copyright 2019 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "http_router.h"
  17. #include "http_connection.h"
  18. #include "http_private.h"
  19. #include "libutil/regexp.h"
  20. #include "libutil/printf.h"
  21. #include "libserver/logger.h"
  22. #include "utlist.h"
  23. #include "unix-std.h"
  24. enum http_magic_type {
  25. HTTP_MAGIC_PLAIN = 0,
  26. HTTP_MAGIC_HTML,
  27. HTTP_MAGIC_CSS,
  28. HTTP_MAGIC_JS,
  29. HTTP_MAGIC_ICO,
  30. HTTP_MAGIC_PNG,
  31. HTTP_MAGIC_JPG
  32. };
  33. static const struct _rspamd_http_magic {
  34. const gchar *ext;
  35. const gchar *ct;
  36. } http_file_types[] = {
  37. [HTTP_MAGIC_PLAIN] = {"txt", "text/plain"},
  38. [HTTP_MAGIC_HTML] = {"html", "text/html"},
  39. [HTTP_MAGIC_CSS] = {"css", "text/css"},
  40. [HTTP_MAGIC_JS] = {"js", "application/javascript"},
  41. [HTTP_MAGIC_ICO] = {"ico", "image/x-icon"},
  42. [HTTP_MAGIC_PNG] = {"png", "image/png"},
  43. [HTTP_MAGIC_JPG] = {"jpg", "image/jpeg"},
  44. [HTTP_MAGIC_JPG] = {"svg", "image/svg+xml"},
  45. };
  46. /*
  47. * HTTP router functions
  48. */
  49. static void
  50. rspamd_http_entry_free(struct rspamd_http_connection_entry *entry)
  51. {
  52. if (entry != NULL) {
  53. close(entry->conn->fd);
  54. rspamd_http_connection_unref(entry->conn);
  55. if (entry->rt->finish_handler) {
  56. entry->rt->finish_handler(entry);
  57. }
  58. DL_DELETE(entry->rt->conns, entry);
  59. g_free(entry);
  60. }
  61. }
  62. static void
  63. rspamd_http_router_error_handler(struct rspamd_http_connection *conn,
  64. GError *err)
  65. {
  66. struct rspamd_http_connection_entry *entry = conn->ud;
  67. struct rspamd_http_message *msg;
  68. if (entry->is_reply) {
  69. /* At this point we need to finish this session and close owned socket */
  70. if (entry->rt->error_handler != NULL) {
  71. entry->rt->error_handler(entry, err);
  72. }
  73. rspamd_http_entry_free(entry);
  74. }
  75. else {
  76. /* Here we can write a reply to a client */
  77. if (entry->rt->error_handler != NULL) {
  78. entry->rt->error_handler(entry, err);
  79. }
  80. msg = rspamd_http_new_message(HTTP_RESPONSE);
  81. msg->date = time(NULL);
  82. msg->code = err->code;
  83. rspamd_http_message_set_body(msg, err->message, strlen(err->message));
  84. rspamd_http_connection_reset(entry->conn);
  85. rspamd_http_connection_write_message(entry->conn,
  86. msg,
  87. NULL,
  88. "text/plain",
  89. entry,
  90. entry->rt->timeout);
  91. entry->is_reply = TRUE;
  92. }
  93. }
  94. static const gchar *
  95. rspamd_http_router_detect_ct(const gchar *path)
  96. {
  97. const gchar *dot;
  98. guint i;
  99. dot = strrchr(path, '.');
  100. if (dot == NULL) {
  101. return http_file_types[HTTP_MAGIC_PLAIN].ct;
  102. }
  103. dot++;
  104. for (i = 0; i < G_N_ELEMENTS(http_file_types); i++) {
  105. if (strcmp(http_file_types[i].ext, dot) == 0) {
  106. return http_file_types[i].ct;
  107. }
  108. }
  109. return http_file_types[HTTP_MAGIC_PLAIN].ct;
  110. }
  111. static gboolean
  112. rspamd_http_router_is_subdir(const gchar *parent, const gchar *sub)
  113. {
  114. if (parent == NULL || sub == NULL || *parent == '\0') {
  115. return FALSE;
  116. }
  117. while (*parent != '\0') {
  118. if (*sub != *parent) {
  119. return FALSE;
  120. }
  121. parent++;
  122. sub++;
  123. }
  124. parent--;
  125. if (*parent == G_DIR_SEPARATOR) {
  126. return TRUE;
  127. }
  128. return (*sub == G_DIR_SEPARATOR || *sub == '\0');
  129. }
  130. static gboolean
  131. rspamd_http_router_try_file(struct rspamd_http_connection_entry *entry,
  132. rspamd_ftok_t *lookup, gboolean expand_path)
  133. {
  134. struct stat st;
  135. gint fd;
  136. gchar filebuf[PATH_MAX], realbuf[PATH_MAX], *dir;
  137. struct rspamd_http_message *reply_msg;
  138. rspamd_snprintf(filebuf, sizeof(filebuf), "%s%c%T",
  139. entry->rt->default_fs_path, G_DIR_SEPARATOR, lookup);
  140. if (realpath(filebuf, realbuf) == NULL ||
  141. lstat(realbuf, &st) == -1) {
  142. return FALSE;
  143. }
  144. if (S_ISDIR(st.st_mode) && expand_path) {
  145. /* Try to append 'index.html' to the url */
  146. rspamd_fstring_t *nlookup;
  147. rspamd_ftok_t tok;
  148. gboolean ret;
  149. nlookup = rspamd_fstring_sized_new(lookup->len + sizeof("index.html"));
  150. rspamd_printf_fstring(&nlookup, "%T%c%s", lookup, G_DIR_SEPARATOR,
  151. "index.html");
  152. tok.begin = nlookup->str;
  153. tok.len = nlookup->len;
  154. ret = rspamd_http_router_try_file(entry, &tok, FALSE);
  155. rspamd_fstring_free(nlookup);
  156. return ret;
  157. }
  158. else if (!S_ISREG(st.st_mode)) {
  159. return FALSE;
  160. }
  161. /* We also need to ensure that file is inside the defined dir */
  162. rspamd_strlcpy(filebuf, realbuf, sizeof(filebuf));
  163. dir = dirname(filebuf);
  164. if (dir == NULL ||
  165. !rspamd_http_router_is_subdir(entry->rt->default_fs_path,
  166. dir)) {
  167. return FALSE;
  168. }
  169. fd = open(realbuf, O_RDONLY);
  170. if (fd == -1) {
  171. return FALSE;
  172. }
  173. reply_msg = rspamd_http_new_message(HTTP_RESPONSE);
  174. reply_msg->date = time(NULL);
  175. reply_msg->code = 200;
  176. rspamd_http_router_insert_headers(entry->rt, reply_msg);
  177. if (!rspamd_http_message_set_body_from_fd(reply_msg, fd)) {
  178. rspamd_http_message_free(reply_msg);
  179. close(fd);
  180. return FALSE;
  181. }
  182. close(fd);
  183. rspamd_http_connection_reset(entry->conn);
  184. msg_debug("requested file %s", realbuf);
  185. rspamd_http_connection_write_message(entry->conn, reply_msg, NULL,
  186. rspamd_http_router_detect_ct(realbuf), entry,
  187. entry->rt->timeout);
  188. return TRUE;
  189. }
  190. static void
  191. rspamd_http_router_send_error(GError *err,
  192. struct rspamd_http_connection_entry *entry)
  193. {
  194. struct rspamd_http_message *err_msg;
  195. err_msg = rspamd_http_new_message(HTTP_RESPONSE);
  196. err_msg->date = time(NULL);
  197. err_msg->code = err->code;
  198. rspamd_http_message_set_body(err_msg, err->message,
  199. strlen(err->message));
  200. entry->is_reply = TRUE;
  201. err_msg->status = rspamd_fstring_new_init(err->message, strlen(err->message));
  202. rspamd_http_router_insert_headers(entry->rt, err_msg);
  203. rspamd_http_connection_reset(entry->conn);
  204. rspamd_http_connection_write_message(entry->conn,
  205. err_msg,
  206. NULL,
  207. "text/plain",
  208. entry,
  209. entry->rt->timeout);
  210. }
  211. static int
  212. rspamd_http_router_finish_handler(struct rspamd_http_connection *conn,
  213. struct rspamd_http_message *msg)
  214. {
  215. struct rspamd_http_connection_entry *entry = conn->ud;
  216. rspamd_http_router_handler_t handler = NULL;
  217. gpointer found;
  218. GError *err;
  219. rspamd_ftok_t lookup;
  220. const rspamd_ftok_t *encoding;
  221. struct http_parser_url u;
  222. guint i;
  223. rspamd_regexp_t *re;
  224. struct rspamd_http_connection_router *router;
  225. gchar *pathbuf = NULL;
  226. G_STATIC_ASSERT(sizeof(rspamd_http_router_handler_t) ==
  227. sizeof(gpointer));
  228. memset(&lookup, 0, sizeof(lookup));
  229. router = entry->rt;
  230. if (entry->is_reply) {
  231. /* Request is finished, it is safe to free a connection */
  232. rspamd_http_entry_free(entry);
  233. }
  234. else {
  235. if (G_UNLIKELY(msg->method != HTTP_GET && msg->method != HTTP_POST)) {
  236. if (router->unknown_method_handler) {
  237. return router->unknown_method_handler(entry, msg);
  238. }
  239. else {
  240. err = g_error_new(HTTP_ERROR, 500,
  241. "Invalid method");
  242. if (entry->rt->error_handler != NULL) {
  243. entry->rt->error_handler(entry, err);
  244. }
  245. rspamd_http_router_send_error(err, entry);
  246. g_error_free(err);
  247. return 0;
  248. }
  249. }
  250. /* Search for path */
  251. if (msg->url != NULL && msg->url->len != 0) {
  252. http_parser_parse_url(msg->url->str, msg->url->len, TRUE, &u);
  253. if (u.field_set & (1 << UF_PATH)) {
  254. gsize unnorm_len;
  255. pathbuf = g_malloc(u.field_data[UF_PATH].len);
  256. memcpy(pathbuf, msg->url->str + u.field_data[UF_PATH].off,
  257. u.field_data[UF_PATH].len);
  258. lookup.begin = pathbuf;
  259. lookup.len = u.field_data[UF_PATH].len;
  260. rspamd_normalize_path_inplace(pathbuf,
  261. lookup.len,
  262. &unnorm_len);
  263. lookup.len = unnorm_len;
  264. }
  265. else {
  266. lookup.begin = msg->url->str;
  267. lookup.len = msg->url->len;
  268. }
  269. found = g_hash_table_lookup(entry->rt->paths, &lookup);
  270. memcpy(&handler, &found, sizeof(found));
  271. msg_debug("requested known path: %T", &lookup);
  272. }
  273. else {
  274. err = g_error_new(HTTP_ERROR, 404,
  275. "Empty path requested");
  276. if (entry->rt->error_handler != NULL) {
  277. entry->rt->error_handler(entry, err);
  278. }
  279. rspamd_http_router_send_error(err, entry);
  280. g_error_free(err);
  281. return 0;
  282. }
  283. entry->is_reply = TRUE;
  284. encoding = rspamd_http_message_find_header(msg, "Accept-Encoding");
  285. if (encoding && rspamd_substring_search(encoding->begin, encoding->len,
  286. "gzip", 4) != -1) {
  287. entry->support_gzip = TRUE;
  288. }
  289. if (handler != NULL) {
  290. if (pathbuf) {
  291. g_free(pathbuf);
  292. }
  293. return handler(entry, msg);
  294. }
  295. else {
  296. /* Try regexps */
  297. for (i = 0; i < router->regexps->len; i++) {
  298. re = g_ptr_array_index(router->regexps, i);
  299. if (rspamd_regexp_match(re, lookup.begin, lookup.len,
  300. TRUE)) {
  301. found = rspamd_regexp_get_ud(re);
  302. memcpy(&handler, &found, sizeof(found));
  303. if (pathbuf) {
  304. g_free(pathbuf);
  305. }
  306. return handler(entry, msg);
  307. }
  308. }
  309. /* Now try plain file */
  310. if (entry->rt->default_fs_path == NULL || lookup.len == 0 ||
  311. !rspamd_http_router_try_file(entry, &lookup, TRUE)) {
  312. err = g_error_new(HTTP_ERROR, 404,
  313. "Not found");
  314. if (entry->rt->error_handler != NULL) {
  315. entry->rt->error_handler(entry, err);
  316. }
  317. msg_info("path: %T not found", &lookup);
  318. rspamd_http_router_send_error(err, entry);
  319. g_error_free(err);
  320. }
  321. }
  322. }
  323. if (pathbuf) {
  324. g_free(pathbuf);
  325. }
  326. return 0;
  327. }
  328. struct rspamd_http_connection_router *
  329. rspamd_http_router_new(rspamd_http_router_error_handler_t eh,
  330. rspamd_http_router_finish_handler_t fh,
  331. ev_tstamp timeout,
  332. const char *default_fs_path,
  333. struct rspamd_http_context *ctx)
  334. {
  335. struct rspamd_http_connection_router *nrouter;
  336. struct stat st;
  337. nrouter = g_malloc0(sizeof(struct rspamd_http_connection_router));
  338. nrouter->paths = g_hash_table_new_full(rspamd_ftok_icase_hash,
  339. rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free, NULL);
  340. nrouter->regexps = g_ptr_array_new();
  341. nrouter->conns = NULL;
  342. nrouter->error_handler = eh;
  343. nrouter->finish_handler = fh;
  344. nrouter->response_headers = g_hash_table_new_full(rspamd_strcase_hash,
  345. rspamd_strcase_equal, g_free, g_free);
  346. nrouter->event_loop = ctx->event_loop;
  347. nrouter->timeout = timeout;
  348. nrouter->default_fs_path = NULL;
  349. if (default_fs_path != NULL) {
  350. if (stat(default_fs_path, &st) == -1) {
  351. msg_err("cannot stat %s", default_fs_path);
  352. }
  353. else {
  354. if (!S_ISDIR(st.st_mode)) {
  355. msg_err("path %s is not a directory", default_fs_path);
  356. }
  357. else {
  358. nrouter->default_fs_path = realpath(default_fs_path, NULL);
  359. }
  360. }
  361. }
  362. nrouter->ctx = ctx;
  363. return nrouter;
  364. }
  365. void rspamd_http_router_set_key(struct rspamd_http_connection_router *router,
  366. struct rspamd_cryptobox_keypair *key)
  367. {
  368. g_assert(key != NULL);
  369. router->key = rspamd_keypair_ref(key);
  370. }
  371. void rspamd_http_router_add_path(struct rspamd_http_connection_router *router,
  372. const gchar *path, rspamd_http_router_handler_t handler)
  373. {
  374. gpointer ptr;
  375. rspamd_ftok_t *key;
  376. rspamd_fstring_t *storage;
  377. G_STATIC_ASSERT(sizeof(rspamd_http_router_handler_t) ==
  378. sizeof(gpointer));
  379. if (path != NULL && handler != NULL && router != NULL) {
  380. memcpy(&ptr, &handler, sizeof(ptr));
  381. storage = rspamd_fstring_new_init(path, strlen(path));
  382. key = g_malloc0(sizeof(*key));
  383. key->begin = storage->str;
  384. key->len = storage->len;
  385. g_hash_table_insert(router->paths, key, ptr);
  386. }
  387. }
  388. void rspamd_http_router_set_unknown_handler(struct rspamd_http_connection_router *router,
  389. rspamd_http_router_handler_t handler)
  390. {
  391. if (router != NULL) {
  392. router->unknown_method_handler = handler;
  393. }
  394. }
  395. void rspamd_http_router_add_header(struct rspamd_http_connection_router *router,
  396. const gchar *name, const gchar *value)
  397. {
  398. if (name != NULL && value != NULL && router != NULL) {
  399. g_hash_table_replace(router->response_headers, g_strdup(name),
  400. g_strdup(value));
  401. }
  402. }
  403. void rspamd_http_router_insert_headers(struct rspamd_http_connection_router *router,
  404. struct rspamd_http_message *msg)
  405. {
  406. GHashTableIter it;
  407. gpointer k, v;
  408. if (router && msg) {
  409. g_hash_table_iter_init(&it, router->response_headers);
  410. while (g_hash_table_iter_next(&it, &k, &v)) {
  411. rspamd_http_message_add_header(msg, k, v);
  412. }
  413. }
  414. }
  415. void rspamd_http_router_add_regexp(struct rspamd_http_connection_router *router,
  416. struct rspamd_regexp_s *re, rspamd_http_router_handler_t handler)
  417. {
  418. gpointer ptr;
  419. G_STATIC_ASSERT(sizeof(rspamd_http_router_handler_t) ==
  420. sizeof(gpointer));
  421. if (re != NULL && handler != NULL && router != NULL) {
  422. memcpy(&ptr, &handler, sizeof(ptr));
  423. rspamd_regexp_set_ud(re, ptr);
  424. g_ptr_array_add(router->regexps, rspamd_regexp_ref(re));
  425. }
  426. }
  427. void rspamd_http_router_handle_socket(struct rspamd_http_connection_router *router,
  428. gint fd, gpointer ud)
  429. {
  430. struct rspamd_http_connection_entry *conn;
  431. conn = g_malloc0(sizeof(struct rspamd_http_connection_entry));
  432. conn->rt = router;
  433. conn->ud = ud;
  434. conn->is_reply = FALSE;
  435. conn->conn = rspamd_http_connection_new_server(router->ctx,
  436. fd,
  437. NULL,
  438. rspamd_http_router_error_handler,
  439. rspamd_http_router_finish_handler,
  440. 0);
  441. if (router->key) {
  442. rspamd_http_connection_set_key(conn->conn, router->key);
  443. }
  444. rspamd_http_connection_read_message(conn->conn, conn, router->timeout);
  445. DL_PREPEND(router->conns, conn);
  446. }
  447. void rspamd_http_router_free(struct rspamd_http_connection_router *router)
  448. {
  449. struct rspamd_http_connection_entry *conn, *tmp;
  450. rspamd_regexp_t *re;
  451. guint i;
  452. if (router) {
  453. DL_FOREACH_SAFE(router->conns, conn, tmp)
  454. {
  455. rspamd_http_entry_free(conn);
  456. }
  457. if (router->key) {
  458. rspamd_keypair_unref(router->key);
  459. }
  460. if (router->default_fs_path != NULL) {
  461. g_free(router->default_fs_path);
  462. }
  463. for (i = 0; i < router->regexps->len; i++) {
  464. re = g_ptr_array_index(router->regexps, i);
  465. rspamd_regexp_unref(re);
  466. }
  467. g_ptr_array_free(router->regexps, TRUE);
  468. g_hash_table_unref(router->paths);
  469. g_hash_table_unref(router->response_headers);
  470. g_free(router);
  471. }
  472. }