You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

message.c 47KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "util.h"
  18. #include "rspamd.h"
  19. #include "message.h"
  20. #include "cfg_file.h"
  21. #include "libutil/regexp.h"
  22. #include "html.h"
  23. #include "images.h"
  24. #include "utlist.h"
  25. #include "tokenizers/tokenizers.h"
  26. #ifdef WITH_SNOWBALL
  27. #include "libstemmer.h"
  28. #endif
  29. #include "acism.h"
  30. #include <iconv.h>
  31. #define RECURSION_LIMIT 5
  32. #define UTF8_CHARSET "UTF-8"
  33. #define GTUBE_SYMBOL "GTUBE"
  34. #define SET_PART_RAW(part) ((part)->flags &= ~RSPAMD_MIME_PART_FLAG_UTF)
  35. #define SET_PART_UTF(part) ((part)->flags |= RSPAMD_MIME_PART_FLAG_UTF)
  36. static ac_trie_t *gtube_trie = NULL;
  37. static const gchar gtube_pattern[] = "XJS*C4JDBQADN1.NSBN3*2IDNEN*"
  38. "GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X";
  39. static rspamd_regexp_t *utf_compatible_re = NULL;
  40. static GQuark
  41. rspamd_message_quark (void)
  42. {
  43. return g_quark_from_static_string ("mime-error");
  44. }
  45. static void
  46. parse_qmail_recv (rspamd_mempool_t * pool,
  47. gchar *line,
  48. struct received_header *r)
  49. {
  50. gchar *s, *p, t;
  51. /* We are interested only with received from network headers */
  52. if ((p = strstr (line, "from network")) == NULL) {
  53. r->is_error = 2;
  54. return;
  55. }
  56. p += sizeof ("from network") - 1;
  57. while (g_ascii_isspace (*p) || *p == '[') {
  58. p++;
  59. }
  60. /* format is ip/host */
  61. s = p;
  62. if (*p) {
  63. while (g_ascii_isdigit (*++p) || *p == '.') ;
  64. if (*p != '/') {
  65. r->is_error = 1;
  66. return;
  67. }
  68. else {
  69. *p = '\0';
  70. r->real_ip = rspamd_mempool_strdup (pool, s);
  71. *p = '/';
  72. /* Now try to parse hostname */
  73. s = ++p;
  74. while (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p ==
  75. '_') {
  76. p++;
  77. }
  78. t = *p;
  79. *p = '\0';
  80. r->real_hostname = rspamd_mempool_strdup (pool, s);
  81. *p = t;
  82. }
  83. }
  84. }
  85. static void
  86. parse_recv_header (rspamd_mempool_t * pool,
  87. struct raw_header *rh,
  88. struct received_header *r)
  89. {
  90. gchar *p, *s, t, **res = NULL;
  91. gchar *line;
  92. enum {
  93. RSPAMD_RECV_STATE_INIT = 0,
  94. RSPAMD_RECV_STATE_FROM,
  95. RSPAMD_RECV_STATE_IP_BLOCK,
  96. RSPAMD_RECV_STATE_BRACES_BLOCK,
  97. RSPAMD_RECV_STATE_BY_BLOCK,
  98. RSPAMD_RECV_STATE_PARSE_IP,
  99. RSPAMD_RECV_STATE_PARSE_IP6,
  100. RSPAMD_RECV_STATE_SKIP_SPACES,
  101. RSPAMD_RECV_STATE_ERROR
  102. } state = RSPAMD_RECV_STATE_INIT, next_state = RSPAMD_RECV_STATE_INIT;
  103. gboolean is_exim = FALSE;
  104. line = rh->decoded;
  105. if (line == NULL) {
  106. return;
  107. }
  108. g_strstrip (line);
  109. p = line;
  110. s = line;
  111. while (*p) {
  112. switch (state) {
  113. /* Initial state, search for from */
  114. case RSPAMD_RECV_STATE_INIT:
  115. if (*p == 'f' || *p == 'F') {
  116. if (g_ascii_tolower (*++p) == 'r' && g_ascii_tolower (*++p) ==
  117. 'o' && g_ascii_tolower (*++p) == 'm') {
  118. p++;
  119. state = RSPAMD_RECV_STATE_SKIP_SPACES;
  120. next_state = RSPAMD_RECV_STATE_FROM;
  121. }
  122. }
  123. else if (g_ascii_tolower (*p) == 'b' &&
  124. g_ascii_tolower (*(p + 1)) == 'y') {
  125. state = RSPAMD_RECV_STATE_IP_BLOCK;
  126. }
  127. else {
  128. /* This can be qmail header, parse it separately */
  129. parse_qmail_recv (pool, line, r);
  130. return;
  131. }
  132. break;
  133. /* Read hostname */
  134. case RSPAMD_RECV_STATE_FROM:
  135. if (*p == '[') {
  136. /* This should be IP address */
  137. res = &r->from_ip;
  138. state = RSPAMD_RECV_STATE_PARSE_IP;
  139. next_state = RSPAMD_RECV_STATE_IP_BLOCK;
  140. s = ++p;
  141. }
  142. else if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p ==
  143. '_') {
  144. p++;
  145. }
  146. else {
  147. t = *p;
  148. *p = '\0';
  149. r->from_hostname = rspamd_mempool_strdup (pool, s);
  150. *p = t;
  151. state = RSPAMD_RECV_STATE_SKIP_SPACES;
  152. next_state = RSPAMD_RECV_STATE_IP_BLOCK;
  153. }
  154. break;
  155. /* Try to extract additional info */
  156. case RSPAMD_RECV_STATE_IP_BLOCK:
  157. /* Try to extract ip or () info or by */
  158. if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) ==
  159. 'y') {
  160. p += 2;
  161. /* Skip spaces after by */
  162. state = RSPAMD_RECV_STATE_SKIP_SPACES;
  163. next_state = RSPAMD_RECV_STATE_BY_BLOCK;
  164. }
  165. else if (*p == '(') {
  166. state = RSPAMD_RECV_STATE_SKIP_SPACES;
  167. next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
  168. p++;
  169. }
  170. else if (*p == '[') {
  171. /* Got ip before '(' so extract it */
  172. s = ++p;
  173. res = &r->from_ip;
  174. state = RSPAMD_RECV_STATE_PARSE_IP;
  175. next_state = RSPAMD_RECV_STATE_IP_BLOCK;
  176. }
  177. else {
  178. p++;
  179. }
  180. break;
  181. /* We are in () block. Here can be found real hostname and real ip, this is written by some MTA */
  182. case RSPAMD_RECV_STATE_BRACES_BLOCK:
  183. /* End of block */
  184. if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' ||
  185. *p == '_' || *p == ':') {
  186. p++;
  187. }
  188. else if (*p == '[') {
  189. s = ++p;
  190. state = RSPAMD_RECV_STATE_PARSE_IP;
  191. res = &r->real_ip;
  192. next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
  193. }
  194. else {
  195. if (p > s) {
  196. /* Got some real hostname */
  197. /* check whether it is helo or p is not space symbol */
  198. if (!g_ascii_isspace (*p) || *(p + 1) != '[') {
  199. /* Exim style ([ip]:port helo=hostname) */
  200. if (*s == ':' && (g_ascii_isspace (*p) || *p == ')')) {
  201. /* Ip ending */
  202. is_exim = TRUE;
  203. state = RSPAMD_RECV_STATE_SKIP_SPACES;
  204. next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
  205. }
  206. else if (p - s == 4 && memcmp (s, "helo=", 5) == 0) {
  207. p++;
  208. is_exim = TRUE;
  209. if (r->real_hostname == NULL && r->from_hostname !=
  210. NULL) {
  211. r->real_hostname = r->from_hostname;
  212. }
  213. s = p;
  214. while (*p != ')' && !g_ascii_isspace (*p) && *p !=
  215. '\0') {
  216. p++;
  217. }
  218. if (p > s) {
  219. r->from_hostname = rspamd_mempool_alloc (pool,
  220. p - s + 1);
  221. rspamd_strlcpy (r->from_hostname, s, p - s + 1);
  222. }
  223. }
  224. else if (p - s == 4 && memcmp (s, "port=", 5) == 0) {
  225. p++;
  226. is_exim = TRUE;
  227. while (g_ascii_isdigit (*p)) {
  228. p++;
  229. }
  230. state = RSPAMD_RECV_STATE_SKIP_SPACES;
  231. next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
  232. }
  233. else if (*p == '=' && is_exim) {
  234. /* Just skip unknown pairs */
  235. p++;
  236. while (!g_ascii_isspace (*p) && *p != ')' && *p !=
  237. '\0') {
  238. p++;
  239. }
  240. state = RSPAMD_RECV_STATE_SKIP_SPACES;
  241. next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
  242. }
  243. else {
  244. /* skip all */
  245. while (*p++ != ')' && *p != '\0') ;
  246. state = RSPAMD_RECV_STATE_IP_BLOCK;
  247. }
  248. }
  249. else {
  250. /* Postfix style (hostname [ip]) */
  251. t = *p;
  252. *p = '\0';
  253. r->real_hostname = rspamd_mempool_strdup (pool, s);
  254. *p = t;
  255. /* Now parse ip */
  256. p += 2;
  257. s = p;
  258. res = &r->real_ip;
  259. state = RSPAMD_RECV_STATE_PARSE_IP;
  260. next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
  261. continue;
  262. }
  263. if (*p == ')') {
  264. p++;
  265. state = RSPAMD_RECV_STATE_SKIP_SPACES;
  266. next_state = RSPAMD_RECV_STATE_IP_BLOCK;
  267. }
  268. }
  269. else if (*p == ')') {
  270. p++;
  271. state = RSPAMD_RECV_STATE_SKIP_SPACES;
  272. next_state = RSPAMD_RECV_STATE_IP_BLOCK;
  273. }
  274. else {
  275. r->is_error = 1;
  276. return;
  277. }
  278. }
  279. break;
  280. /* Got by word */
  281. case RSPAMD_RECV_STATE_BY_BLOCK:
  282. /* Here can be only hostname */
  283. if ((g_ascii_isalnum (*p) || *p == '.' || *p == '-'
  284. || *p == '_') && p[1] != '\0') {
  285. p++;
  286. }
  287. else {
  288. /* We got something like hostname */
  289. if (p[1] != '\0') {
  290. t = *p;
  291. *p = '\0';
  292. r->by_hostname = rspamd_mempool_strdup (pool, s);
  293. *p = t;
  294. }
  295. else {
  296. r->by_hostname = rspamd_mempool_strdup (pool, s);
  297. }
  298. /* Now end of parsing */
  299. if (is_exim) {
  300. /* Adjust for exim received */
  301. if (r->real_ip == NULL && r->from_ip != NULL) {
  302. r->real_ip = r->from_ip;
  303. }
  304. else if (r->from_ip == NULL && r->real_ip != NULL) {
  305. r->from_ip = r->real_ip;
  306. if (r->real_hostname == NULL && r->from_hostname !=
  307. NULL) {
  308. r->real_hostname = r->from_hostname;
  309. }
  310. }
  311. }
  312. return;
  313. }
  314. break;
  315. /* Extract ip */
  316. case RSPAMD_RECV_STATE_PARSE_IP:
  317. if (*p == 'I') {
  318. /* IPv6: */
  319. state = RSPAMD_RECV_STATE_PARSE_IP6;
  320. }
  321. else {
  322. while (g_ascii_isxdigit (*p) || *p == '.' || *p == ':') {
  323. p++;
  324. }
  325. if (*p != ']') {
  326. /* Not an ip in fact */
  327. state = RSPAMD_RECV_STATE_SKIP_SPACES;
  328. p++;
  329. }
  330. else {
  331. *p = '\0';
  332. *res = rspamd_mempool_strdup (pool, s);
  333. *p = ']';
  334. p++;
  335. state = RSPAMD_RECV_STATE_SKIP_SPACES;
  336. }
  337. }
  338. break;
  339. case RSPAMD_RECV_STATE_PARSE_IP6:
  340. if (g_ascii_strncasecmp (p, "IPv6:", sizeof ("IPv6") - 1) == 0) {
  341. p += sizeof ("IPv6") - 1;
  342. s = p;
  343. state = RSPAMD_RECV_STATE_PARSE_IP;
  344. }
  345. else {
  346. state = RSPAMD_RECV_STATE_SKIP_SPACES;
  347. }
  348. break;
  349. /* Skip spaces */
  350. case RSPAMD_RECV_STATE_SKIP_SPACES:
  351. if (!g_ascii_isspace (*p)) {
  352. state = next_state;
  353. s = p;
  354. }
  355. else {
  356. p++;
  357. }
  358. break;
  359. default:
  360. r->is_error = 1;
  361. return;
  362. break;
  363. }
  364. }
  365. r->is_error = 1;
  366. return;
  367. }
  368. static void
  369. append_raw_header (struct rspamd_task *task,
  370. GHashTable *target, struct raw_header *rh)
  371. {
  372. struct raw_header *lp;
  373. rh->next = NULL;
  374. rh->prev = rh;
  375. if ((lp =
  376. g_hash_table_lookup (target, rh->name)) != NULL) {
  377. DL_APPEND (lp, rh);
  378. }
  379. else {
  380. g_hash_table_insert (target, rh->name, rh);
  381. }
  382. msg_debug_task ("add raw header %s: %s", rh->name, rh->value);
  383. }
  384. /* Convert raw headers to a list of struct raw_header * */
  385. static void
  386. process_raw_headers (struct rspamd_task *task, GHashTable *target,
  387. const gchar *in, gsize len)
  388. {
  389. struct raw_header *new = NULL;
  390. const gchar *p, *c, *end;
  391. gchar *tmp, *tp;
  392. gint state = 0, l, next_state = 100, err_state = 100, t_state;
  393. gboolean valid_folding = FALSE;
  394. p = in;
  395. end = p + len;
  396. c = p;
  397. while (p < end) {
  398. /* FSM for processing headers */
  399. switch (state) {
  400. case 0:
  401. /* Begin processing headers */
  402. if (!g_ascii_isalpha (*p)) {
  403. /* We have some garbage at the beginning of headers, skip this line */
  404. state = 100;
  405. next_state = 0;
  406. }
  407. else {
  408. state = 1;
  409. c = p;
  410. }
  411. break;
  412. case 1:
  413. /* We got something like header's name */
  414. if (*p == ':') {
  415. new =
  416. rspamd_mempool_alloc0 (task->task_pool,
  417. sizeof (struct raw_header));
  418. new->prev = new;
  419. l = p - c;
  420. tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
  421. rspamd_strlcpy (tmp, c, l + 1);
  422. new->name = tmp;
  423. new->empty_separator = TRUE;
  424. p++;
  425. state = 2;
  426. c = p;
  427. }
  428. else if (g_ascii_isspace (*p)) {
  429. /* Not header but some garbage */
  430. task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
  431. state = 100;
  432. next_state = 0;
  433. }
  434. else {
  435. p++;
  436. }
  437. break;
  438. case 2:
  439. /* We got header's name, so skip any \t or spaces */
  440. if (*p == '\t') {
  441. new->tab_separated = TRUE;
  442. new->empty_separator = FALSE;
  443. p++;
  444. }
  445. else if (*p == ' ') {
  446. new->empty_separator = FALSE;
  447. p++;
  448. }
  449. else if (*p == '\n' || *p == '\r') {
  450. /* Process folding */
  451. state = 99;
  452. l = p - c;
  453. if (l > 0) {
  454. tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
  455. rspamd_strlcpy (tmp, c, l + 1);
  456. new->separator = tmp;
  457. }
  458. next_state = 3;
  459. err_state = 5;
  460. c = p;
  461. }
  462. else {
  463. /* Process value */
  464. l = p - c;
  465. if (l >= 0) {
  466. tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
  467. rspamd_strlcpy (tmp, c, l + 1);
  468. new->separator = tmp;
  469. }
  470. c = p;
  471. state = 3;
  472. }
  473. break;
  474. case 3:
  475. if (*p == '\r' || *p == '\n') {
  476. /* Hold folding */
  477. state = 99;
  478. next_state = 3;
  479. err_state = 4;
  480. }
  481. else if (p + 1 == end) {
  482. state = 4;
  483. }
  484. else {
  485. p++;
  486. }
  487. break;
  488. case 4:
  489. /* Copy header's value */
  490. l = p - c;
  491. tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
  492. tp = tmp;
  493. t_state = 0;
  494. while (l--) {
  495. if (t_state == 0) {
  496. /* Before folding */
  497. if (*c == '\n' || *c == '\r') {
  498. t_state = 1;
  499. c++;
  500. *tp++ = ' ';
  501. }
  502. else {
  503. *tp++ = *c++;
  504. }
  505. }
  506. else if (t_state == 1) {
  507. /* Inside folding */
  508. if (g_ascii_isspace (*c)) {
  509. c++;
  510. }
  511. else {
  512. t_state = 0;
  513. *tp++ = *c++;
  514. }
  515. }
  516. }
  517. /* Strip last space that can be added by \r\n parsing */
  518. if (*(tp - 1) == ' ') {
  519. tp--;
  520. }
  521. *tp = '\0';
  522. /* Strip the initial spaces that could also be added by folding */
  523. while (*tmp != '\0' && g_ascii_isspace (*tmp)) {
  524. tmp ++;
  525. }
  526. new->value = tmp;
  527. new->decoded = g_mime_utils_header_decode_text (new->value);
  528. rspamd_mempool_add_destructor (task->task_pool,
  529. (rspamd_mempool_destruct_t)g_free, new->decoded);
  530. append_raw_header (task, target, new);
  531. state = 0;
  532. break;
  533. case 5:
  534. /* Header has only name, no value */
  535. new->value = "";
  536. new->decoded = NULL;
  537. append_raw_header (task, target, new);
  538. state = 0;
  539. break;
  540. case 99:
  541. /* Folding state */
  542. if (p + 1 == end) {
  543. state = err_state;
  544. }
  545. else {
  546. if (*p == '\r' || *p == '\n') {
  547. p++;
  548. valid_folding = FALSE;
  549. }
  550. else if (*p == '\t' || *p == ' ') {
  551. /* Valid folding */
  552. p++;
  553. valid_folding = TRUE;
  554. }
  555. else {
  556. if (valid_folding) {
  557. debug_task ("go to state: %d->%d", state, next_state);
  558. state = next_state;
  559. }
  560. else {
  561. /* Fall back */
  562. debug_task ("go to state: %d->%d", state, err_state);
  563. state = err_state;
  564. }
  565. }
  566. }
  567. break;
  568. case 100:
  569. /* Fail state, skip line */
  570. if (*p == '\r') {
  571. if (*(p + 1) == '\n') {
  572. p++;
  573. }
  574. p++;
  575. state = next_state;
  576. }
  577. else if (*p == '\n') {
  578. if (*(p + 1) == '\r') {
  579. p++;
  580. }
  581. p++;
  582. state = next_state;
  583. }
  584. else if (p + 1 == end) {
  585. state = next_state;
  586. p++;
  587. }
  588. else {
  589. p++;
  590. }
  591. break;
  592. }
  593. }
  594. }
  595. static void
  596. free_byte_array_callback (void *pointer)
  597. {
  598. GByteArray *arr = (GByteArray *) pointer;
  599. g_byte_array_free (arr, TRUE);
  600. }
  601. static gboolean
  602. charset_validate (rspamd_mempool_t *pool, const gchar *in, gchar **out)
  603. {
  604. /*
  605. * This is a simple routine to validate input charset
  606. * we just check that charset starts with alphanumeric and ends
  607. * with alphanumeric
  608. */
  609. const gchar *begin, *end;
  610. gboolean changed = FALSE, to_uppercase = FALSE;
  611. begin = in;
  612. while (!g_ascii_isalnum (*begin)) {
  613. begin ++;
  614. changed = TRUE;
  615. }
  616. if (!g_ascii_islower(*begin)) {
  617. changed = TRUE;
  618. to_uppercase = TRUE;
  619. }
  620. end = begin + strlen (begin) - 1;
  621. while (!g_ascii_isalnum (*end)) {
  622. end --;
  623. changed = TRUE;
  624. }
  625. if (!changed) {
  626. *out = (gchar *)in;
  627. }
  628. else {
  629. *out = rspamd_mempool_alloc (pool, end - begin + 2);
  630. if (to_uppercase) {
  631. gchar *o = *out;
  632. while (begin != end + 1) {
  633. if (g_ascii_islower (*begin)) {
  634. *o++ = g_ascii_toupper (*begin ++);
  635. }
  636. else {
  637. *o++ = *begin++;
  638. }
  639. }
  640. *o = '\0';
  641. }
  642. else {
  643. rspamd_strlcpy (*out, begin, end - begin + 2);
  644. }
  645. }
  646. return TRUE;
  647. }
  648. static GQuark
  649. converter_error_quark (void)
  650. {
  651. return g_quark_from_static_string ("conversion error");
  652. }
  653. static gchar *
  654. rspamd_text_to_utf8 (struct rspamd_task *task,
  655. gchar *input, gsize len, const gchar *in_enc,
  656. gsize *olen, GError **err)
  657. {
  658. gchar *res, *s, *d;
  659. gsize outlen;
  660. iconv_t ic;
  661. gsize processed, ret;
  662. ic = iconv_open (UTF8_CHARSET, in_enc);
  663. if (ic == (iconv_t)-1) {
  664. g_set_error (err, converter_error_quark(), EINVAL,
  665. "cannot open iconv for: %s", in_enc);
  666. return NULL;
  667. }
  668. /* For the most of charsets utf8 notation is larger than native one */
  669. outlen = len * 2 + 1;
  670. res = rspamd_mempool_alloc (task->task_pool, outlen);
  671. s = input;
  672. d = res;
  673. processed = outlen - 1;
  674. while (len > 0 && processed > 0) {
  675. ret = iconv (ic, &s, &len, &d, &processed);
  676. if (ret == (gsize)-1) {
  677. switch (errno) {
  678. case E2BIG:
  679. g_set_error (err, converter_error_quark(), EINVAL,
  680. "output of size %zd is not enough to handle "
  681. "converison of %zd bytes", outlen, len);
  682. iconv_close (ic);
  683. return NULL;
  684. case EILSEQ:
  685. case EINVAL:
  686. /* Ignore bad characters */
  687. if (processed > 0 && len > 0) {
  688. *d++ = '?';
  689. s++;
  690. len --;
  691. processed --;
  692. }
  693. break;
  694. }
  695. }
  696. else if (ret == 0) {
  697. break;
  698. }
  699. }
  700. *d = '\0';
  701. *olen = d - res;
  702. iconv_close (ic);
  703. return res;
  704. }
  705. static GByteArray *
  706. convert_text_to_utf (struct rspamd_task *task,
  707. GByteArray * part_content,
  708. GMimeContentType * type,
  709. struct mime_text_part *text_part)
  710. {
  711. GError *err = NULL;
  712. gsize write_bytes;
  713. const gchar *charset;
  714. gchar *res_str, *ocharset;
  715. GByteArray *result_array;
  716. if (task->cfg->raw_mode) {
  717. SET_PART_RAW (text_part);
  718. return part_content;
  719. }
  720. if (utf_compatible_re == NULL) {
  721. utf_compatible_re = rspamd_regexp_new (
  722. "^(?:utf-?8.*)|(?:us-ascii)|(?:ascii)|(?:us)|(?:ISO-8859-1)|"
  723. "(?:latin.*)|(?:CSASCII)$",
  724. "i", NULL);
  725. }
  726. if ((charset =
  727. g_mime_content_type_get_parameter (type, "charset")) == NULL) {
  728. SET_PART_RAW (text_part);
  729. return part_content;
  730. }
  731. if (!charset_validate (task->task_pool, charset, &ocharset)) {
  732. msg_info_task (
  733. "<%s>: has invalid charset",
  734. task->message_id);
  735. SET_PART_RAW (text_part);
  736. return part_content;
  737. }
  738. if (rspamd_regexp_match (utf_compatible_re, ocharset, strlen (ocharset), TRUE)) {
  739. if (g_utf8_validate (part_content->data, part_content->len, NULL)) {
  740. SET_PART_UTF (text_part);
  741. return part_content;
  742. }
  743. else {
  744. msg_info_task (
  745. "<%s>: contains invalid utf8 characters, assume it as raw",
  746. task->message_id);
  747. SET_PART_RAW (text_part);
  748. return part_content;
  749. }
  750. }
  751. else {
  752. res_str = rspamd_text_to_utf8 (task, part_content->data,
  753. part_content->len,
  754. ocharset,
  755. &write_bytes,
  756. &err);
  757. if (res_str == NULL) {
  758. msg_warn_task ("<%s>: cannot convert from %s to utf8: %s",
  759. task->message_id,
  760. ocharset,
  761. err ? err->message : "unknown problem");
  762. SET_PART_RAW (text_part);
  763. g_error_free (err);
  764. return part_content;
  765. }
  766. }
  767. result_array = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray));
  768. result_array->data = res_str;
  769. result_array->len = write_bytes;
  770. SET_PART_UTF (text_part);
  771. return result_array;
  772. }
  773. struct language_match {
  774. const char *code;
  775. const char *name;
  776. GUnicodeScript script;
  777. };
  778. static int
  779. language_elts_cmp (const void *a, const void *b)
  780. {
  781. GUnicodeScript sc = *(const GUnicodeScript *)a;
  782. const struct language_match *bb = (const struct language_match *)b;
  783. return (sc - bb->script);
  784. }
  785. static void
  786. detect_text_language (struct mime_text_part *part)
  787. {
  788. /* Keep sorted */
  789. static const struct language_match language_codes[] = {
  790. { "", "english", G_UNICODE_SCRIPT_COMMON },
  791. { "", "", G_UNICODE_SCRIPT_INHERITED },
  792. { "ar", "arabic", G_UNICODE_SCRIPT_ARABIC },
  793. { "hy", "armenian", G_UNICODE_SCRIPT_ARMENIAN },
  794. { "bn", "chineese", G_UNICODE_SCRIPT_BENGALI },
  795. { "", "", G_UNICODE_SCRIPT_BOPOMOFO },
  796. { "chr", "", G_UNICODE_SCRIPT_CHEROKEE },
  797. { "cop", "", G_UNICODE_SCRIPT_COPTIC },
  798. { "ru", "russian", G_UNICODE_SCRIPT_CYRILLIC },
  799. /* Deseret was used to write English */
  800. { "", "", G_UNICODE_SCRIPT_DESERET },
  801. { "hi", "", G_UNICODE_SCRIPT_DEVANAGARI },
  802. { "am", "", G_UNICODE_SCRIPT_ETHIOPIC },
  803. { "ka", "", G_UNICODE_SCRIPT_GEORGIAN },
  804. { "", "", G_UNICODE_SCRIPT_GOTHIC },
  805. { "el", "greek", G_UNICODE_SCRIPT_GREEK },
  806. { "gu", "", G_UNICODE_SCRIPT_GUJARATI },
  807. { "pa", "", G_UNICODE_SCRIPT_GURMUKHI },
  808. { "han", "chineese", G_UNICODE_SCRIPT_HAN },
  809. { "ko", "", G_UNICODE_SCRIPT_HANGUL },
  810. { "he", "hebrew", G_UNICODE_SCRIPT_HEBREW },
  811. { "ja", "", G_UNICODE_SCRIPT_HIRAGANA },
  812. { "kn", "", G_UNICODE_SCRIPT_KANNADA },
  813. { "ja", "", G_UNICODE_SCRIPT_KATAKANA },
  814. { "km", "", G_UNICODE_SCRIPT_KHMER },
  815. { "lo", "", G_UNICODE_SCRIPT_LAO },
  816. { "en", "english", G_UNICODE_SCRIPT_LATIN },
  817. { "ml", "", G_UNICODE_SCRIPT_MALAYALAM },
  818. { "mn", "", G_UNICODE_SCRIPT_MONGOLIAN },
  819. { "my", "", G_UNICODE_SCRIPT_MYANMAR },
  820. /* Ogham was used to write old Irish */
  821. { "", "", G_UNICODE_SCRIPT_OGHAM },
  822. { "", "", G_UNICODE_SCRIPT_OLD_ITALIC },
  823. { "or", "", G_UNICODE_SCRIPT_ORIYA },
  824. { "", "", G_UNICODE_SCRIPT_RUNIC },
  825. { "si", "", G_UNICODE_SCRIPT_SINHALA },
  826. { "syr", "", G_UNICODE_SCRIPT_SYRIAC },
  827. { "ta", "", G_UNICODE_SCRIPT_TAMIL },
  828. { "te", "", G_UNICODE_SCRIPT_TELUGU },
  829. { "dv", "", G_UNICODE_SCRIPT_THAANA },
  830. { "th", "", G_UNICODE_SCRIPT_THAI },
  831. { "bo", "", G_UNICODE_SCRIPT_TIBETAN },
  832. { "iu", "", G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL },
  833. { "", "", G_UNICODE_SCRIPT_YI },
  834. { "tl", "", G_UNICODE_SCRIPT_TAGALOG },
  835. /* Phillipino languages/scripts */
  836. { "hnn", "", G_UNICODE_SCRIPT_HANUNOO },
  837. { "bku", "", G_UNICODE_SCRIPT_BUHID },
  838. { "tbw", "", G_UNICODE_SCRIPT_TAGBANWA },
  839. { "", "", G_UNICODE_SCRIPT_BRAILLE },
  840. { "", "", G_UNICODE_SCRIPT_CYPRIOT },
  841. { "", "", G_UNICODE_SCRIPT_LIMBU },
  842. /* Used for Somali (so) in the past */
  843. { "", "", G_UNICODE_SCRIPT_OSMANYA },
  844. /* The Shavian alphabet was designed for English */
  845. { "", "", G_UNICODE_SCRIPT_SHAVIAN },
  846. { "", "", G_UNICODE_SCRIPT_LINEAR_B },
  847. { "", "", G_UNICODE_SCRIPT_TAI_LE },
  848. { "uga", "", G_UNICODE_SCRIPT_UGARITIC },
  849. { "", "", G_UNICODE_SCRIPT_NEW_TAI_LUE },
  850. { "bug", "", G_UNICODE_SCRIPT_BUGINESE },
  851. { "", "", G_UNICODE_SCRIPT_GLAGOLITIC },
  852. /* Used for for Berber (ber), but Arabic script is more common */
  853. { "", "", G_UNICODE_SCRIPT_TIFINAGH },
  854. { "syl", "", G_UNICODE_SCRIPT_SYLOTI_NAGRI },
  855. { "peo", "", G_UNICODE_SCRIPT_OLD_PERSIAN },
  856. { "", "", G_UNICODE_SCRIPT_KHAROSHTHI },
  857. { "", "", G_UNICODE_SCRIPT_UNKNOWN },
  858. { "", "", G_UNICODE_SCRIPT_BALINESE },
  859. { "", "", G_UNICODE_SCRIPT_CUNEIFORM },
  860. { "", "", G_UNICODE_SCRIPT_PHOENICIAN },
  861. { "", "", G_UNICODE_SCRIPT_PHAGS_PA },
  862. { "nqo", "", G_UNICODE_SCRIPT_NKO }
  863. };
  864. const struct language_match *lm;
  865. const int max_chars = 32;
  866. if (part != NULL) {
  867. if (IS_PART_UTF (part)) {
  868. /* Try to detect encoding by several symbols */
  869. const gchar *p, *pp;
  870. gunichar c;
  871. gint32 remain = part->content->len, max = 0, processed = 0;
  872. gint32 scripts[G_N_ELEMENTS (language_codes)];
  873. GUnicodeScript scc, sel = G_UNICODE_SCRIPT_COMMON;
  874. p = part->content->data;
  875. memset (scripts, 0, sizeof (scripts));
  876. while (remain > 0 && processed < max_chars) {
  877. c = g_utf8_get_char_validated (p, remain);
  878. if (c == (gunichar) -2 || c == (gunichar) -1) {
  879. break;
  880. }
  881. if (g_unichar_isalpha (c)) {
  882. scc = g_unichar_get_script (c);
  883. if (scc < (gint)G_N_ELEMENTS (scripts)) {
  884. scripts[scc]++;
  885. }
  886. processed ++;
  887. }
  888. pp = g_utf8_next_char (p);
  889. remain -= pp - p;
  890. p = pp;
  891. }
  892. for (remain = 0; remain < (gint)G_N_ELEMENTS (scripts); remain++) {
  893. if (scripts[remain] > max) {
  894. max = scripts[remain];
  895. sel = remain;
  896. }
  897. }
  898. part->script = sel;
  899. lm = bsearch (&sel, language_codes, G_N_ELEMENTS (language_codes),
  900. sizeof (language_codes[0]), &language_elts_cmp);
  901. if (lm != NULL) {
  902. part->lang_code = lm->code;
  903. part->language = lm->name;
  904. }
  905. }
  906. }
  907. }
  908. static void
  909. rspamd_normalize_text_part (struct rspamd_task *task,
  910. struct mime_text_part *part)
  911. {
  912. #ifdef WITH_SNOWBALL
  913. struct sb_stemmer *stem = NULL;
  914. #endif
  915. rspamd_ftok_t *w;
  916. const guchar *r;
  917. gchar *temp_word;
  918. guint i, nlen;
  919. #ifdef WITH_SNOWBALL
  920. if (part->language && part->language[0] != '\0' && IS_PART_UTF (part)) {
  921. stem = sb_stemmer_new (part->language, "UTF_8");
  922. if (stem == NULL) {
  923. msg_info_task ("<%s> cannot create lemmatizer for %s language",
  924. task->message_id, part->language);
  925. }
  926. }
  927. #endif
  928. /* Ugly workaround */
  929. part->normalized_words = rspamd_tokenize_text (part->content->data,
  930. part->content->len, IS_PART_UTF (part), task->cfg,
  931. part->urls_offset, FALSE,
  932. NULL);
  933. if (part->normalized_words) {
  934. for (i = 0; i < part->normalized_words->len; i ++) {
  935. w = &g_array_index (part->normalized_words, rspamd_ftok_t, i);
  936. r = NULL;
  937. #ifdef WITH_SNOWBALL
  938. if (stem) {
  939. r = sb_stemmer_stem (stem, w->begin, w->len);
  940. }
  941. #endif
  942. if (w->len > 0 && !(w->len == 6 && memcmp (w->begin, "!!EX!!", 6) == 0)) {
  943. if (r != NULL) {
  944. nlen = strlen (r);
  945. nlen = MIN (nlen, w->len);
  946. temp_word = rspamd_mempool_alloc (task->task_pool, nlen);
  947. memcpy (temp_word, r, nlen);
  948. w->begin = temp_word;
  949. w->len = nlen;
  950. }
  951. else {
  952. temp_word = rspamd_mempool_alloc (task->task_pool, w->len);
  953. memcpy (temp_word, w->begin, w->len);
  954. if (IS_PART_UTF (part)) {
  955. rspamd_str_lc_utf8 (temp_word, w->len);
  956. }
  957. else {
  958. rspamd_str_lc (temp_word, w->len);
  959. }
  960. w->begin = temp_word;
  961. }
  962. }
  963. }
  964. }
  965. #ifdef WITH_SNOWBALL
  966. if (stem != NULL) {
  967. sb_stemmer_delete (stem);
  968. }
  969. #endif
  970. }
  971. #define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))
  972. static guint
  973. rspamd_words_levenshtein_distance (struct rspamd_task *task,
  974. GArray *w1, GArray *w2)
  975. {
  976. guint s1len, s2len, x, y, lastdiag, olddiag;
  977. guint *column;
  978. rspamd_ftok_t *s1, *s2;
  979. gint eq;
  980. static const guint max_words = 8192;
  981. s1len = w1->len;
  982. s2len = w2->len;
  983. if (s1len > max_words) {
  984. msg_err_task ("cannot compare parts with more than %ud words: %ud",
  985. max_words, s1len);
  986. return 0;
  987. }
  988. column = g_alloca ((s1len + 1) * sizeof (guint));
  989. for (y = 1; y <= s1len; y++) {
  990. column[y] = y;
  991. }
  992. for (x = 1; x <= s2len; x++) {
  993. column[0] = x;
  994. for (y = 1, lastdiag = x - 1; y <= s1len; y++) {
  995. olddiag = column[y];
  996. s1 = &g_array_index (w1, rspamd_ftok_t, y - 1);
  997. s2 = &g_array_index (w2, rspamd_ftok_t, x - 1);
  998. eq = rspamd_ftok_cmp (s1, s2) == 0 ? 0 : 1;
  999. column[y] = MIN3 (column[y] + 1, column[y - 1] + 1,
  1000. lastdiag + (eq));
  1001. lastdiag = olddiag;
  1002. }
  1003. }
  1004. return column[s1len];
  1005. }
  1006. static int
  1007. rspamd_gtube_cb (int strnum, int textpos, void *context)
  1008. {
  1009. return TRUE;
  1010. }
  1011. static gboolean
  1012. rspamd_check_gtube (struct rspamd_task *task, struct mime_text_part *part)
  1013. {
  1014. static ac_trie_pat_t pat[1] = {
  1015. {
  1016. .ptr = gtube_pattern,
  1017. .len = sizeof (gtube_pattern) - 1
  1018. }
  1019. };
  1020. gint state = 0;
  1021. g_assert (part != NULL);
  1022. if (gtube_trie == NULL) {
  1023. gtube_trie = acism_create (pat, G_N_ELEMENTS (pat));
  1024. }
  1025. if (part->content && part->content->len > sizeof (gtube_pattern)) {
  1026. if (acism_lookup (gtube_trie, part->content->data, part->content->len,
  1027. rspamd_gtube_cb, NULL, &state, FALSE)) {
  1028. task->flags |= RSPAMD_TASK_FLAG_SKIP;
  1029. task->flags |= RSPAMD_TASK_FLAG_GTUBE;
  1030. msg_info_task ("<%s>: gtube pattern has been found in part of length %ud",
  1031. task->message_id, part->content->len);
  1032. return TRUE;
  1033. }
  1034. }
  1035. return FALSE;
  1036. }
  1037. static void
  1038. process_text_part (struct rspamd_task *task,
  1039. GByteArray *part_content,
  1040. GMimeContentType *type,
  1041. struct mime_part *mime_part,
  1042. GMimeObject *parent,
  1043. gboolean is_empty)
  1044. {
  1045. struct mime_text_part *text_part;
  1046. const gchar *cd, *p, *c;
  1047. guint remain;
  1048. /* Skip attachements */
  1049. #ifndef GMIME24
  1050. cd = g_mime_part_get_content_disposition (GMIME_PART (mime_part->mime));
  1051. if (cd &&
  1052. g_ascii_strcasecmp (cd,
  1053. "attachment") == 0 && !task->cfg->check_text_attachements) {
  1054. debug_task ("skip attachments for checking as text parts");
  1055. return;
  1056. }
  1057. #else
  1058. cd = g_mime_object_get_disposition (GMIME_OBJECT (mime_part->mime));
  1059. if (cd &&
  1060. g_ascii_strcasecmp (cd,
  1061. GMIME_DISPOSITION_ATTACHMENT) == 0 &&
  1062. !task->cfg->check_text_attachements) {
  1063. debug_task ("skip attachments for checking as text parts");
  1064. return;
  1065. }
  1066. #endif
  1067. if (g_mime_content_type_is_type (type, "text",
  1068. "html") || g_mime_content_type_is_type (type, "text", "xhtml")) {
  1069. text_part =
  1070. rspamd_mempool_alloc0 (task->task_pool,
  1071. sizeof (struct mime_text_part));
  1072. text_part->flags |= RSPAMD_MIME_PART_FLAG_HTML;
  1073. if (is_empty) {
  1074. text_part->flags |= RSPAMD_MIME_PART_FLAG_EMPTY;
  1075. text_part->orig = NULL;
  1076. text_part->content = NULL;
  1077. g_ptr_array_add (task->text_parts, text_part);
  1078. return;
  1079. }
  1080. text_part->orig = part_content;
  1081. part_content = convert_text_to_utf (task,
  1082. text_part->orig,
  1083. type,
  1084. text_part);
  1085. text_part->html = rspamd_mempool_alloc0 (task->task_pool,
  1086. sizeof (*text_part->html));
  1087. text_part->parent = parent;
  1088. text_part->mime_part = mime_part;
  1089. text_part->flags |= RSPAMD_MIME_PART_FLAG_BALANCED;
  1090. text_part->content = rspamd_html_process_part_full (
  1091. task->task_pool,
  1092. text_part->html,
  1093. part_content,
  1094. &text_part->urls_offset,
  1095. task->urls,
  1096. task->emails);
  1097. if (text_part->content->len == 0) {
  1098. text_part->flags |= RSPAMD_MIME_PART_FLAG_EMPTY;
  1099. }
  1100. /* Handle offsets of this part */
  1101. if (text_part->urls_offset != NULL) {
  1102. text_part->urls_offset = g_list_reverse (text_part->urls_offset);
  1103. rspamd_mempool_add_destructor (task->task_pool,
  1104. (rspamd_mempool_destruct_t) g_list_free, text_part->urls_offset);
  1105. }
  1106. rspamd_mempool_add_destructor (task->task_pool,
  1107. (rspamd_mempool_destruct_t) free_byte_array_callback,
  1108. text_part->content);
  1109. g_ptr_array_add (task->text_parts, text_part);
  1110. }
  1111. else if (g_mime_content_type_is_type (type, "text", "*")) {
  1112. text_part =
  1113. rspamd_mempool_alloc0 (task->task_pool,
  1114. sizeof (struct mime_text_part));
  1115. text_part->parent = parent;
  1116. text_part->mime_part = mime_part;
  1117. if (is_empty) {
  1118. text_part->flags |= RSPAMD_MIME_PART_FLAG_EMPTY;
  1119. text_part->orig = NULL;
  1120. text_part->content = NULL;
  1121. g_ptr_array_add (task->text_parts, text_part);
  1122. return;
  1123. }
  1124. text_part->content = convert_text_to_utf (task,
  1125. part_content,
  1126. type,
  1127. text_part);
  1128. text_part->orig = part_content;
  1129. rspamd_url_text_extract (task->task_pool, task, text_part, FALSE);
  1130. g_ptr_array_add (task->text_parts, text_part);
  1131. }
  1132. else {
  1133. return;
  1134. }
  1135. if (rspamd_check_gtube (task, text_part)) {
  1136. struct metric_result *mres;
  1137. mres = rspamd_create_metric_result (task, DEFAULT_METRIC);
  1138. if (mres != NULL) {
  1139. mres->score = mres->metric->actions[METRIC_ACTION_REJECT].score;
  1140. mres->action = METRIC_ACTION_REJECT;
  1141. }
  1142. task->pre_result.action = METRIC_ACTION_REJECT;
  1143. task->pre_result.str = "Gtube pattern";
  1144. rspamd_task_insert_result (task, GTUBE_SYMBOL, 0, NULL);
  1145. return;
  1146. }
  1147. /* Post process part */
  1148. detect_text_language (text_part);
  1149. rspamd_normalize_text_part (task, text_part);
  1150. /* Calculate number of lines */
  1151. p = text_part->content->data;
  1152. remain = text_part->content->len;
  1153. c = p;
  1154. while (p != NULL && remain > 0) {
  1155. p = memchr (c, '\n', remain);
  1156. if (p != NULL) {
  1157. text_part->nlines ++;
  1158. remain -= p - c + 1;
  1159. c = p + 1;
  1160. }
  1161. }
  1162. }
  1163. struct mime_foreach_data {
  1164. struct rspamd_task *task;
  1165. guint parser_recursion;
  1166. GMimeObject *parent;
  1167. };
  1168. #ifdef GMIME24
  1169. static void
  1170. mime_foreach_callback (GMimeObject * parent,
  1171. GMimeObject * part,
  1172. gpointer user_data)
  1173. #else
  1174. static void
  1175. mime_foreach_callback (GMimeObject * part, gpointer user_data)
  1176. #endif
  1177. {
  1178. struct mime_foreach_data *md = user_data;
  1179. struct rspamd_task *task;
  1180. struct mime_part *mime_part;
  1181. GMimeContentType *type;
  1182. GMimeDataWrapper *wrapper;
  1183. GMimeStream *part_stream;
  1184. GByteArray *part_content;
  1185. gchar *hdrs;
  1186. task = md->task;
  1187. /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */
  1188. /* find out what class 'part' is... */
  1189. if (GMIME_IS_MESSAGE_PART (part)) {
  1190. /* message/rfc822 or message/news */
  1191. GMimeMessage *message;
  1192. /* g_mime_message_foreach_part() won't descend into
  1193. child message parts, so if we want to count any
  1194. subparts of this child message, we'll have to call
  1195. g_mime_message_foreach_part() again here. */
  1196. message = g_mime_message_part_get_message ((GMimeMessagePart *) part);
  1197. if (md->parser_recursion++ < RECURSION_LIMIT) {
  1198. #ifdef GMIME24
  1199. g_mime_message_foreach (message, mime_foreach_callback, md);
  1200. #else
  1201. g_mime_message_foreach_part (message, mime_foreach_callback, md);
  1202. #endif
  1203. }
  1204. else {
  1205. msg_err_task ("too deep mime recursion detected: %d", md->parser_recursion);
  1206. return;
  1207. }
  1208. #ifndef GMIME24
  1209. g_object_unref (message);
  1210. #endif
  1211. }
  1212. else if (GMIME_IS_MESSAGE_PARTIAL (part)) {
  1213. /* message/partial */
  1214. /* this is an incomplete message part, probably a
  1215. large message that the sender has broken into
  1216. smaller parts and is sending us bit by bit. we
  1217. could save some info about it so that we could
  1218. piece this back together again once we get all the
  1219. parts? */
  1220. }
  1221. else if (GMIME_IS_MULTIPART (part)) {
  1222. /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */
  1223. #ifndef GMIME24
  1224. debug_task ("detected multipart part");
  1225. /* we'll get to finding out if this is a signed/encrypted multipart later... */
  1226. if (task->parser_recursion++ < RECURSION_LIMIT) {
  1227. g_mime_multipart_foreach ((GMimeMultipart *) part,
  1228. mime_foreach_callback,
  1229. md);
  1230. }
  1231. else {
  1232. msg_err_task ("endless recursion detected: %d", task->parser_recursion);
  1233. return;
  1234. }
  1235. #endif
  1236. type = (GMimeContentType *) g_mime_object_get_content_type (GMIME_OBJECT (
  1237. part));
  1238. mime_part = rspamd_mempool_alloc0 (task->task_pool,
  1239. sizeof (struct mime_part));
  1240. hdrs = g_mime_object_get_headers (GMIME_OBJECT (part));
  1241. mime_part->raw_headers = g_hash_table_new (rspamd_strcase_hash,
  1242. rspamd_strcase_equal);
  1243. rspamd_mempool_add_destructor (task->task_pool,
  1244. (rspamd_mempool_destruct_t) g_hash_table_unref,
  1245. mime_part->raw_headers);
  1246. if (hdrs != NULL) {
  1247. process_raw_headers (task, mime_part->raw_headers,
  1248. hdrs, strlen (hdrs));
  1249. g_free (hdrs);
  1250. }
  1251. mime_part->type = type;
  1252. /* XXX: we don't need it, but it's sometimes dereferenced */
  1253. mime_part->content = g_byte_array_new ();
  1254. mime_part->parent = md->parent;
  1255. mime_part->filename = NULL;
  1256. mime_part->mime = part;
  1257. debug_task ("found part with content-type: %s/%s",
  1258. type->type,
  1259. type->subtype);
  1260. g_ptr_array_add (task->parts, mime_part);
  1261. md->parent = part;
  1262. }
  1263. else if (GMIME_IS_PART (part)) {
  1264. /* a normal leaf part, could be text/plain or image/jpeg etc */
  1265. #ifdef GMIME24
  1266. type = (GMimeContentType *) g_mime_object_get_content_type (GMIME_OBJECT (
  1267. part));
  1268. #else
  1269. type =
  1270. (GMimeContentType *) g_mime_part_get_content_type (GMIME_PART (part));
  1271. #endif
  1272. if (type == NULL) {
  1273. msg_warn_task ("type of part is unknown, assume text/plain");
  1274. type = g_mime_content_type_new ("text", "plain");
  1275. #ifdef GMIME24
  1276. rspamd_mempool_add_destructor (task->task_pool,
  1277. (rspamd_mempool_destruct_t) g_object_unref, type);
  1278. #else
  1279. rspamd_mempool_add_destructor (task->task_pool,
  1280. (rspamd_mempool_destruct_t) g_mime_content_type_destroy, type);
  1281. #endif
  1282. }
  1283. wrapper = g_mime_part_get_content_object (GMIME_PART (part));
  1284. #ifdef GMIME24
  1285. if (wrapper != NULL && GMIME_IS_DATA_WRAPPER (wrapper)) {
  1286. #else
  1287. if (wrapper != NULL) {
  1288. #endif
  1289. part_stream = g_mime_stream_mem_new ();
  1290. if (g_mime_data_wrapper_write_to_stream (wrapper,
  1291. part_stream) != -1) {
  1292. g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (
  1293. part_stream), FALSE);
  1294. part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (
  1295. part_stream));
  1296. g_object_unref (part_stream);
  1297. mime_part =
  1298. rspamd_mempool_alloc0 (task->task_pool,
  1299. sizeof (struct mime_part));
  1300. hdrs = g_mime_object_get_headers (GMIME_OBJECT (part));
  1301. mime_part->raw_headers = g_hash_table_new (rspamd_strcase_hash,
  1302. rspamd_strcase_equal);
  1303. rspamd_mempool_add_destructor (task->task_pool,
  1304. (rspamd_mempool_destruct_t) g_hash_table_unref,
  1305. mime_part->raw_headers);
  1306. if (hdrs != NULL) {
  1307. process_raw_headers (task, mime_part->raw_headers,
  1308. hdrs, strlen (hdrs));
  1309. g_free (hdrs);
  1310. }
  1311. mime_part->type = type;
  1312. mime_part->content = part_content;
  1313. mime_part->parent = md->parent;
  1314. mime_part->filename = g_mime_part_get_filename (GMIME_PART (
  1315. part));
  1316. mime_part->mime = part;
  1317. debug_task ("found part with content-type: %s/%s",
  1318. type->type,
  1319. type->subtype);
  1320. g_ptr_array_add (task->parts, mime_part);
  1321. /* Skip empty parts */
  1322. process_text_part (task,
  1323. part_content,
  1324. type,
  1325. mime_part,
  1326. md->parent,
  1327. (part_content->len <= 0));
  1328. }
  1329. else {
  1330. msg_warn_task ("write to stream failed: %d, %s", errno,
  1331. strerror (errno));
  1332. }
  1333. #ifndef GMIME24
  1334. g_object_unref (wrapper);
  1335. #endif
  1336. }
  1337. else {
  1338. msg_warn_task ("cannot get wrapper for mime part, type of part: %s/%s",
  1339. type->type,
  1340. type->subtype);
  1341. }
  1342. }
  1343. else {
  1344. g_assert_not_reached ();
  1345. }
  1346. }
  1347. static void
  1348. destroy_message (void *pointer)
  1349. {
  1350. GMimeMessage *msg = pointer;
  1351. g_object_unref (msg);
  1352. }
  1353. /* Creates message from various data using libmagic to detect type */
  1354. static void
  1355. rspamd_message_from_data (struct rspamd_task *task, GByteArray *data,
  1356. GMimeStream *stream)
  1357. {
  1358. GMimeMessage *message;
  1359. GMimePart *part;
  1360. GMimeDataWrapper *wrapper;
  1361. GMimeContentType *ct = NULL;
  1362. const char *mb = NULL;
  1363. gchar *mid;
  1364. g_assert (data != NULL);
  1365. message = g_mime_message_new (TRUE);
  1366. task->message = message;
  1367. if (task->from_envelope) {
  1368. g_mime_message_set_sender (task->message,
  1369. rspamd_task_get_sender (task));
  1370. }
  1371. if (task->cfg->libs_ctx) {
  1372. mb = magic_buffer (task->cfg->libs_ctx->libmagic,
  1373. data->data,
  1374. data->len);
  1375. if (mb) {
  1376. ct = g_mime_content_type_new_from_string (mb);
  1377. }
  1378. }
  1379. msg_warn_task ("construct fake mime of type: %s", mb);
  1380. part = g_mime_part_new ();
  1381. if (ct != NULL) {
  1382. g_mime_object_set_content_type (GMIME_OBJECT (part), ct);
  1383. g_object_unref (ct);
  1384. }
  1385. #ifdef GMIME24
  1386. wrapper = g_mime_data_wrapper_new_with_stream (stream,
  1387. GMIME_CONTENT_ENCODING_8BIT);
  1388. #else
  1389. wrapper = g_mime_data_wrapper_new_with_stream (stream,
  1390. GMIME_PART_ENCODING_8BIT);
  1391. #endif
  1392. g_mime_part_set_content_object (part, wrapper);
  1393. g_mime_message_set_mime_part (task->message, GMIME_OBJECT (part));
  1394. /* Register destructors */
  1395. rspamd_mempool_add_destructor (task->task_pool,
  1396. (rspamd_mempool_destruct_t) g_object_unref, wrapper);
  1397. rspamd_mempool_add_destructor (task->task_pool,
  1398. (rspamd_mempool_destruct_t) g_object_unref, part);
  1399. rspamd_mempool_add_destructor (task->task_pool,
  1400. (rspamd_mempool_destruct_t) destroy_message, task->message);
  1401. /* Generate message ID */
  1402. mid = g_mime_utils_generate_message_id ("localhost.localdomain");
  1403. rspamd_mempool_add_destructor (task->task_pool,
  1404. (rspamd_mempool_destruct_t) g_free, mid);
  1405. g_mime_message_set_message_id (task->message, mid);
  1406. task->message_id = mid;
  1407. task->queue_id = mid;
  1408. /* Set headers for message */
  1409. if (task->subject) {
  1410. g_mime_message_set_subject (task->message, task->subject);
  1411. }
  1412. }
  1413. gboolean
  1414. rspamd_message_parse (struct rspamd_task *task)
  1415. {
  1416. GMimeMessage *message;
  1417. GMimeParser *parser;
  1418. GMimeStream *stream;
  1419. GByteArray *tmp;
  1420. GList *first, *cur;
  1421. GMimeObject *parent;
  1422. const GMimeContentType *ct;
  1423. struct raw_header *rh;
  1424. struct mime_text_part *p1, *p2;
  1425. struct mime_foreach_data md;
  1426. struct received_header *recv;
  1427. gchar *url_str;
  1428. const gchar *url_end, *p, *end;
  1429. struct rspamd_url *subject_url;
  1430. gsize len;
  1431. goffset hdr_pos;
  1432. gint rc, state = 0, diff, *pdiff;
  1433. guint tw, dw;
  1434. tmp = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray));
  1435. p = task->msg.begin;
  1436. len = task->msg.len;
  1437. /* Skip any space characters to avoid some bad messages to be unparsed */
  1438. while (len > 0 && g_ascii_isspace (*p)) {
  1439. p ++;
  1440. len --;
  1441. }
  1442. tmp->data = (guint8 *)p;
  1443. tmp->len = len;
  1444. stream = g_mime_stream_mem_new_with_byte_array (tmp);
  1445. /*
  1446. * This causes g_mime_stream not to free memory by itself as it is memory allocated by
  1447. * pool allocator
  1448. */
  1449. g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE);
  1450. if (task->flags & RSPAMD_TASK_FLAG_MIME) {
  1451. debug_task ("construct mime parser from string length %d",
  1452. (gint) task->msg.len);
  1453. /* create a new parser object to parse the stream */
  1454. parser = g_mime_parser_new_with_stream (stream);
  1455. /* parse the message from the stream */
  1456. message = g_mime_parser_construct_message (parser);
  1457. if (message == NULL) {
  1458. if (!task->cfg->allow_raw_input) {
  1459. msg_err_task ("cannot construct mime from stream");
  1460. g_set_error (&task->err,
  1461. rspamd_message_quark (),
  1462. RSPAMD_FILTER_ERROR, \
  1463. "cannot parse MIME in the message");
  1464. /* TODO: backport to 0.9 */
  1465. g_object_unref (parser);
  1466. return FALSE;
  1467. }
  1468. else {
  1469. task->flags &= ~RSPAMD_TASK_FLAG_MIME;
  1470. rspamd_message_from_data (task, tmp, stream);
  1471. }
  1472. }
  1473. else {
  1474. GString str;
  1475. task->message = message;
  1476. rspamd_mempool_add_destructor (task->task_pool,
  1477. (rspamd_mempool_destruct_t) destroy_message, task->message);
  1478. str.str = tmp->data;
  1479. str.len = tmp->len;
  1480. hdr_pos = rspamd_string_find_eoh (&str);
  1481. if (hdr_pos > 0 && hdr_pos < tmp->len) {
  1482. task->raw_headers_content.begin = (gchar *) (p);
  1483. task->raw_headers_content.len = (guint64) (hdr_pos);
  1484. if (task->raw_headers_content.len > 0) {
  1485. process_raw_headers (task, task->raw_headers,
  1486. task->raw_headers_content.begin,
  1487. task->raw_headers_content.len);
  1488. }
  1489. }
  1490. }
  1491. /* free the parser (and the stream) */
  1492. g_object_unref (stream);
  1493. g_object_unref (parser);
  1494. }
  1495. else {
  1496. task->flags &= ~RSPAMD_TASK_FLAG_MIME;
  1497. rspamd_message_from_data (task, tmp, stream);
  1498. g_object_unref (stream);
  1499. }
  1500. /* Save message id for future use */
  1501. task->message_id = g_mime_message_get_message_id (task->message);
  1502. if (task->message_id == NULL) {
  1503. task->message_id = "undef";
  1504. }
  1505. memset (&md, 0, sizeof (md));
  1506. md.task = task;
  1507. #ifdef GMIME24
  1508. g_mime_message_foreach (task->message, mime_foreach_callback, &md);
  1509. #else
  1510. /*
  1511. * This is rather strange, but gmime 2.2 do NOT pass top-level part to foreach callback
  1512. * so we need to set up parent part by hands
  1513. */
  1514. md.parent = g_mime_message_get_mime_part (task->message);
  1515. g_object_unref (md.parent);
  1516. g_mime_message_foreach_part (task->message, mime_foreach_callback, &md);
  1517. #endif
  1518. debug_task ("found %ud parts in message", task->parts->len);
  1519. if (task->queue_id == NULL) {
  1520. task->queue_id = "undef";
  1521. }
  1522. rspamd_images_process (task);
  1523. /* Parse received headers */
  1524. first =
  1525. rspamd_message_get_header (task, "Received", FALSE);
  1526. cur = first;
  1527. while (cur) {
  1528. recv =
  1529. rspamd_mempool_alloc0 (task->task_pool,
  1530. sizeof (struct received_header));
  1531. parse_recv_header (task->task_pool, cur->data, recv);
  1532. g_ptr_array_add (task->received, recv);
  1533. cur = g_list_next (cur);
  1534. }
  1535. /* Extract data from received header if we were not given IP */
  1536. if (task->received->len > 0 && (task->flags & RSPAMD_TASK_FLAG_NO_IP)) {
  1537. recv = g_ptr_array_index (task->received, 0);
  1538. if (recv->real_ip) {
  1539. if (!rspamd_parse_inet_address (&task->from_addr,
  1540. recv->real_ip,
  1541. 0)) {
  1542. msg_warn_task ("cannot get IP from received header: '%s'",
  1543. recv->real_ip);
  1544. task->from_addr = NULL;
  1545. }
  1546. }
  1547. if (recv->real_hostname) {
  1548. task->hostname = recv->real_hostname;
  1549. }
  1550. }
  1551. if (task->from_envelope == NULL) {
  1552. first = rspamd_message_get_header (task, "Return-Path", FALSE);
  1553. if (first) {
  1554. rh = first->data;
  1555. task->from_envelope = internet_address_list_parse_string (rh->value);
  1556. if (task->from_envelope) {
  1557. #ifdef GMIME24
  1558. rspamd_mempool_add_destructor (task->task_pool,
  1559. (rspamd_mempool_destruct_t) g_object_unref,
  1560. task->from_envelope);
  1561. #else
  1562. rspamd_mempool_add_destructor (task->task_pool,
  1563. (rspamd_mempool_destruct_t) internet_address_list_destroy,
  1564. task->from_envelope);
  1565. #endif
  1566. }
  1567. }
  1568. }
  1569. if (task->deliver_to == NULL) {
  1570. first = rspamd_message_get_header (task, "Delivered-To", FALSE);
  1571. if (first) {
  1572. rh = first->data;
  1573. task->deliver_to = rspamd_mempool_strdup (task->task_pool, rh->decoded);
  1574. }
  1575. }
  1576. /* Set mime recipients and sender for the task */
  1577. task->rcpt_mime = g_mime_message_get_all_recipients (task->message);
  1578. if (task->rcpt_mime) {
  1579. #ifdef GMIME24
  1580. rspamd_mempool_add_destructor (task->task_pool,
  1581. (rspamd_mempool_destruct_t) g_object_unref,
  1582. task->rcpt_mime);
  1583. #else
  1584. rspamd_mempool_add_destructor (task->task_pool,
  1585. (rspamd_mempool_destruct_t) internet_address_list_destroy,
  1586. task->rcpt_mime);
  1587. #endif
  1588. }
  1589. first = rspamd_message_get_header (task, "From", FALSE);
  1590. if (first) {
  1591. rh = first->data;
  1592. task->from_mime = internet_address_list_parse_string (rh->value);
  1593. if (task->from_mime) {
  1594. #ifdef GMIME24
  1595. rspamd_mempool_add_destructor (task->task_pool,
  1596. (rspamd_mempool_destruct_t) g_object_unref,
  1597. task->from_mime);
  1598. #else
  1599. rspamd_mempool_add_destructor (task->task_pool,
  1600. (rspamd_mempool_destruct_t) internet_address_list_destroy,
  1601. task->from_mime);
  1602. #endif
  1603. }
  1604. }
  1605. /* Parse urls inside Subject header */
  1606. cur = rspamd_message_get_header (task, "Subject", FALSE);
  1607. if (cur) {
  1608. p = cur->data;
  1609. len = strlen (p);
  1610. end = p + len;
  1611. while (p < end) {
  1612. /* Search to the end of url */
  1613. if (rspamd_url_find (task->task_pool, p, end - p, NULL, &url_end,
  1614. &url_str, FALSE, &state)) {
  1615. if (url_str != NULL) {
  1616. subject_url = rspamd_mempool_alloc0 (task->task_pool,
  1617. sizeof (struct rspamd_url));
  1618. rc = rspamd_url_parse (subject_url, url_str,
  1619. strlen (url_str), task->task_pool);
  1620. if ((rc == URI_ERRNO_OK) && subject_url->hostlen > 0) {
  1621. if (subject_url->protocol != PROTOCOL_MAILTO) {
  1622. if (!g_hash_table_lookup (task->urls, subject_url)) {
  1623. g_hash_table_insert (task->urls,
  1624. subject_url,
  1625. subject_url);
  1626. }
  1627. }
  1628. }
  1629. else if (rc != URI_ERRNO_OK) {
  1630. msg_info_task ("extract of url '%s' failed: %s",
  1631. url_str,
  1632. rspamd_url_strerror (rc));
  1633. }
  1634. }
  1635. }
  1636. else {
  1637. break;
  1638. }
  1639. p = url_end + 1;
  1640. }
  1641. }
  1642. /* Calculate distance for 2-parts messages */
  1643. if (task->text_parts->len == 2) {
  1644. p1 = g_ptr_array_index (task->text_parts, 0);
  1645. p2 = g_ptr_array_index (task->text_parts, 1);
  1646. /* First of all check parent object */
  1647. if (p1->parent && p1->parent == p2->parent) {
  1648. parent = p1->parent;
  1649. ct = g_mime_object_get_content_type (parent);
  1650. if (ct == NULL ||
  1651. !g_mime_content_type_is_type ((GMimeContentType *)ct,
  1652. "multipart", "alternative")) {
  1653. debug_task (
  1654. "two parts are not belong to multipart/alternative container, skip check");
  1655. }
  1656. else {
  1657. if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2) &&
  1658. p1->normalized_words && p2->normalized_words) {
  1659. tw = MAX (p1->normalized_words->len, p2->normalized_words->len);
  1660. if (tw > 0) {
  1661. dw = rspamd_words_levenshtein_distance (task,
  1662. p1->normalized_words,
  1663. p2->normalized_words);
  1664. diff = (100.0 * (gdouble)(tw - dw) / (gdouble)tw);
  1665. debug_task (
  1666. "different words: %d, total words: %d, "
  1667. "got likeliness between parts of %d%%",
  1668. dw, tw,
  1669. diff);
  1670. pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint));
  1671. *pdiff = diff;
  1672. rspamd_mempool_set_variable (task->task_pool,
  1673. "parts_distance",
  1674. pdiff,
  1675. NULL);
  1676. }
  1677. }
  1678. }
  1679. }
  1680. else {
  1681. debug_task (
  1682. "message contains two parts but they are in different multi-parts");
  1683. }
  1684. }
  1685. else {
  1686. debug_task (
  1687. "message has too many text parts, so do not try to compare "
  1688. "them with each other");
  1689. }
  1690. if (task->queue_id) {
  1691. msg_info_task ("loaded message; id: <%s>; queue-id: <%s>",
  1692. task->message_id, task->queue_id);
  1693. }
  1694. else {
  1695. msg_info_task ("loaded message; id: <%s>",
  1696. task->message_id);
  1697. }
  1698. return TRUE;
  1699. }
  1700. GList *
  1701. rspamd_message_get_header (struct rspamd_task *task,
  1702. const gchar *field,
  1703. gboolean strong)
  1704. {
  1705. GList *gret = NULL;
  1706. struct raw_header *rh;
  1707. rh = g_hash_table_lookup (task->raw_headers, field);
  1708. if (rh == NULL) {
  1709. return NULL;
  1710. }
  1711. while (rh) {
  1712. if (strong) {
  1713. if (strcmp (rh->name, field) == 0) {
  1714. gret = g_list_prepend (gret, rh);
  1715. }
  1716. }
  1717. else {
  1718. gret = g_list_prepend (gret, rh);
  1719. }
  1720. rh = rh->next;
  1721. }
  1722. if (gret != NULL) {
  1723. gret = g_list_reverse (gret);
  1724. rspamd_mempool_add_destructor (task->task_pool,
  1725. (rspamd_mempool_destruct_t)g_list_free, gret);
  1726. }
  1727. return gret;
  1728. }