You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

printf_check.cc 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. /*
  2. * Copyright (c) 2015, Vsevolod Stakhov
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. *
  13. * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
  14. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  15. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  16. * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
  17. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  18. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  19. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  20. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  21. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  22. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. */
  24. #include "printf_check.h"
  25. #include "clang/AST/AST.h"
  26. #include "clang/AST/Expr.h"
  27. #include "clang/AST/ASTConsumer.h"
  28. #include "clang/AST/RecursiveASTVisitor.h"
  29. #include <unordered_map>
  30. #include <vector>
  31. #include <sstream>
  32. #include <ctype.h>
  33. using namespace clang;
  34. namespace rspamd {
  35. struct PrintfArgChecker;
  36. static bool cstring_arg_handler (const Expr *arg,
  37. struct PrintfArgChecker *ctx);
  38. static bool int_arg_handler (const Expr *arg,
  39. struct PrintfArgChecker *ctx);
  40. static bool long_arg_handler (const Expr *arg,
  41. struct PrintfArgChecker *ctx);
  42. static bool size_arg_handler (const Expr *arg,
  43. struct PrintfArgChecker *ctx);
  44. static bool char_arg_handler (const Expr *arg,
  45. struct PrintfArgChecker *ctx);
  46. static bool double_arg_handler (const Expr *arg,
  47. struct PrintfArgChecker *ctx);
  48. static bool long_double_arg_handler (const Expr *arg,
  49. struct PrintfArgChecker *ctx);
  50. using arg_parser_t = bool (*) (const Expr *, struct PrintfArgChecker *);
  51. static void
  52. print_error (const std::string &err, const Expr *e, const ASTContext *ast)
  53. {
  54. auto const &sm = ast->getSourceManager ();
  55. auto loc = e->getExprLoc ();
  56. llvm::errs() << err << " at " << loc.printToString (sm) << "\n";
  57. }
  58. struct PrintfArgChecker {
  59. private:
  60. arg_parser_t parser;
  61. public:
  62. int width;
  63. int precision;
  64. bool is_unsigned;
  65. ASTContext *past;
  66. PrintfArgChecker (arg_parser_t _p, ASTContext *_ast) :
  67. parser(_p), past(_ast)
  68. {
  69. width = 0;
  70. precision = 0;
  71. is_unsigned = false;
  72. }
  73. virtual ~PrintfArgChecker () {}
  74. bool operator () (const Expr *e)
  75. {
  76. return parser (e, this);
  77. }
  78. };
  79. class PrintfCheckVisitor::impl {
  80. std::unordered_map<std::string, int> printf_functions;
  81. ASTContext *pcontext;
  82. std::unique_ptr<PrintfArgChecker> parseFlags (const std::string &flags)
  83. {
  84. auto type = flags.back();
  85. switch (type) {
  86. case 's':
  87. return llvm::make_unique<PrintfArgChecker>(cstring_arg_handler,
  88. this->pcontext);
  89. case 'd':
  90. return llvm::make_unique<PrintfArgChecker>(int_arg_handler,
  91. this->pcontext);
  92. case 'z':
  93. return llvm::make_unique<PrintfArgChecker> (size_arg_handler,
  94. this->pcontext);
  95. case 'l':
  96. return llvm::make_unique<PrintfArgChecker> (long_arg_handler,
  97. this->pcontext);
  98. case 'f':
  99. case 'g':
  100. return llvm::make_unique<PrintfArgChecker> (double_arg_handler,
  101. this->pcontext);
  102. case 'F':
  103. case 'G':
  104. return llvm::make_unique<PrintfArgChecker> (long_double_arg_handler,
  105. this->pcontext);
  106. case 'c':
  107. return llvm::make_unique<PrintfArgChecker> (char_arg_handler,
  108. this->pcontext);
  109. default:
  110. llvm::errs () << "unknown parser flag: " << type << "\n";
  111. break;
  112. }
  113. return nullptr;
  114. }
  115. std::shared_ptr<std::vector<PrintfArgChecker> >
  116. genParsers (const StringRef query)
  117. {
  118. enum {
  119. ignore_chars = 0,
  120. read_percent,
  121. read_width,
  122. read_precision,
  123. read_arg
  124. } state = ignore_chars;
  125. int width, precision;
  126. std::string flags;
  127. auto res = std::make_shared<std::vector<PrintfArgChecker> >();
  128. for (const auto c : query) {
  129. switch (state) {
  130. case ignore_chars:
  131. if (c == '%') {
  132. state = read_percent;
  133. flags.clear ();
  134. width = precision = 0;
  135. }
  136. break;
  137. case read_percent:
  138. if (isdigit (c)) {
  139. state = read_width;
  140. width = c - '0';
  141. }
  142. else if (c == '.') {
  143. state = read_precision;
  144. precision = c - '0';
  145. }
  146. else if (c == '*') {
  147. /* %*s - need integer argument */
  148. res->emplace_back (int_arg_handler, this->pcontext);
  149. state = read_arg;
  150. }
  151. else if (c == '%') {
  152. /* Percent character, ignore */
  153. state = ignore_chars;
  154. }
  155. else {
  156. flags.push_back (c);
  157. state = read_arg;
  158. }
  159. break;
  160. case read_width:
  161. if (isdigit (c)) {
  162. width *= 10;
  163. width += c - '0';
  164. }
  165. else if (c == '.') {
  166. state = read_precision;
  167. precision = c - '0';
  168. }
  169. else {
  170. flags.push_back (c);
  171. state = read_arg;
  172. }
  173. break;
  174. case read_precision:
  175. if (isdigit (c)) {
  176. precision *= 10;
  177. precision += c - '0';
  178. }
  179. else if (c == '*') {
  180. res->emplace_back (int_arg_handler, this->pcontext);
  181. state = read_arg;
  182. }
  183. else {
  184. flags.push_back (c);
  185. state = read_arg;
  186. }
  187. break;
  188. case read_arg:
  189. if (!isalpha (c)) {
  190. auto handler = parseFlags (flags);
  191. if (handler) {
  192. auto handler_copy = *handler;
  193. handler_copy.precision = precision;
  194. handler_copy.width = width;
  195. res->emplace_back (std::move (handler_copy));
  196. }
  197. else {
  198. llvm::errs () << "invalid modifier\n";
  199. return nullptr;
  200. }
  201. state = ignore_chars;
  202. }
  203. else {
  204. flags.push_back (c);
  205. }
  206. break;
  207. }
  208. }
  209. if (state == read_arg) {
  210. auto handler = parseFlags (flags);
  211. if (handler) {
  212. auto handler_copy = *handler;
  213. handler_copy.precision = precision;
  214. handler_copy.width = width;
  215. res->emplace_back (std::move (handler_copy));
  216. }
  217. else {
  218. llvm::errs () << "invalid modifier\n";
  219. return nullptr;
  220. }
  221. }
  222. return res;
  223. }
  224. public:
  225. impl (ASTContext *_ctx) : pcontext(_ctx)
  226. {
  227. /* name -> format string position */
  228. printf_functions = {
  229. {"rspamd_printf", 0},
  230. {"rspamd_default_log_function", 4},
  231. {"rspamd_snprintf", 2},
  232. {"rspamd_fprintf", 1}
  233. };
  234. };
  235. bool VisitCallExpr (CallExpr *E)
  236. {
  237. auto callee = dyn_cast<NamedDecl> (E->getCalleeDecl ());
  238. if (callee == NULL) {
  239. llvm::errs () << "Bad callee\n";
  240. return false;
  241. }
  242. auto fname = callee->getNameAsString ();
  243. auto pos_it = printf_functions.find (fname);
  244. if (pos_it != printf_functions.end ()) {
  245. const auto args = E->getArgs ();
  246. auto pos = pos_it->second;
  247. auto query = args[pos];
  248. if (!query->isEvaluatable (*pcontext)) {
  249. llvm::errs () << "Cannot evaluate query\n";
  250. return false;
  251. }
  252. clang::Expr::EvalResult r;
  253. if (!query->EvaluateAsRValue (r, *pcontext)) {
  254. llvm::errs () << "Cannot evaluate query\n";
  255. return false;
  256. }
  257. auto qval = dyn_cast<StringLiteral> (
  258. r.Val.getLValueBase ().get<const Expr *> ());
  259. if (!qval) {
  260. llvm::errs () << "Bad or absent query string\n";
  261. return false;
  262. }
  263. auto parsers = genParsers (qval->getString ());
  264. if (parsers) {
  265. if (parsers->size () != E->getNumArgs () - (pos + 1)) {
  266. std::ostringstream err_buf;
  267. err_buf << "number of arguments for " << fname
  268. << " missmatches query string '" <<
  269. qval->getString().str()
  270. << "', expected " << parsers->size () << " args"
  271. << ", got " << (E->getNumArgs () - (pos + 1))
  272. << " args";
  273. print_error (err_buf.str (), E, this->pcontext);
  274. return false;
  275. }
  276. else {
  277. for (auto i = pos + 1; i < E->getNumArgs (); i++) {
  278. auto arg = args[i];
  279. if (arg) {
  280. if (!parsers->at(i - (pos + 1))(arg)) {
  281. return false;
  282. }
  283. }
  284. }
  285. }
  286. }
  287. }
  288. return true;
  289. }
  290. };
  291. PrintfCheckVisitor::PrintfCheckVisitor (ASTContext *ctx) :
  292. pimpl { new impl(ctx) }
  293. {
  294. }
  295. PrintfCheckVisitor::~PrintfCheckVisitor ()
  296. {
  297. }
  298. bool PrintfCheckVisitor::VisitCallExpr (clang::CallExpr *E)
  299. {
  300. return pimpl->VisitCallExpr (E);
  301. }
  302. /* Type handlers */
  303. static bool
  304. cstring_arg_handler (const Expr *arg, struct PrintfArgChecker *ctx)
  305. {
  306. auto type = arg->getType ().split ().Ty;
  307. if (!type->isPointerType ()) {
  308. print_error (
  309. std::string ("bad string argument for %s: ") +
  310. arg->getType ().getAsString (), arg, ctx->past);
  311. return false;
  312. }
  313. auto ptr_type = type->getPointeeType().split().Ty;
  314. if (!ptr_type->isCharType ()) {
  315. /* We might have gchar * here */
  316. auto desugared_type = ptr_type->getUnqualifiedDesugaredType ();
  317. if (!desugared_type || !desugared_type->isCharType ()) {
  318. if (desugared_type) {
  319. desugared_type->dump ();
  320. }
  321. print_error (
  322. std::string ("bad string argument for %s: ") +
  323. arg->getType ().getAsString (), arg, ctx->past);
  324. return false;
  325. }
  326. }
  327. return true;
  328. }
  329. static bool
  330. int_arg_handler (const Expr *arg, struct PrintfArgChecker *ctx)
  331. {
  332. auto type = arg->getType ().split ().Ty;
  333. auto desugared_type = type->getUnqualifiedDesugaredType ();
  334. if (!desugared_type->isIntegerType ()) {
  335. print_error (std::string ("bad integer argument for %d or * arg: ") +
  336. arg->getType ().getAsString (), arg, ctx->past);
  337. return false;
  338. }
  339. else if (!desugared_type->isBuiltinType ()) {
  340. print_error (std::string ("bad integer argument for %d or * arg: ") +
  341. arg->getType ().getAsString(), arg, ctx->past);
  342. return false;
  343. }
  344. auto builtin_type = dyn_cast<BuiltinType>(desugared_type);
  345. auto kind = builtin_type->getKind ();
  346. if (kind != BuiltinType::Kind::UInt &&
  347. kind != BuiltinType::Kind::Int) {
  348. print_error (std::string ("bad integer argument for %d or * arg: ") +
  349. arg->getType ().getAsString (), arg, ctx->past);
  350. return false;
  351. }
  352. return true;
  353. }
  354. static bool
  355. long_arg_handler (const Expr *arg, struct PrintfArgChecker *ctx)
  356. {
  357. auto type = arg->getType ().split ().Ty;
  358. auto desugared_type = type->getUnqualifiedDesugaredType ();
  359. if (!desugared_type->isIntegerType ()) {
  360. print_error (
  361. std::string ("bad integer argument for %l arg: ") +
  362. arg->getType ().getAsString (), arg, ctx->past);
  363. return false;
  364. }
  365. else if (!desugared_type->isBuiltinType ()) {
  366. print_error (
  367. std::string ("bad integer argument for %l arg: ") +
  368. arg->getType ().getAsString (), arg, ctx->past);
  369. return false;
  370. }
  371. auto builtin_type = dyn_cast<BuiltinType> (desugared_type);
  372. auto kind = builtin_type->getKind ();
  373. if (kind != BuiltinType::Kind::ULong &&
  374. kind != BuiltinType::Kind::Long) {
  375. print_error (
  376. std::string ("bad integer argument for %l arg: ") +
  377. arg->getType ().getAsString (), arg, ctx->past);
  378. return false;
  379. }
  380. return true;
  381. }
  382. static bool
  383. char_arg_handler (const Expr *arg, struct PrintfArgChecker *ctx)
  384. {
  385. auto type = arg->getType ().split ().Ty;
  386. auto desugared_type = type->getUnqualifiedDesugaredType ();
  387. if (!desugared_type->isCharType ()) {
  388. print_error (
  389. std::string ("bad char argument for %c arg: ") +
  390. arg->getType ().getAsString (), arg, ctx->past);
  391. return false;
  392. }
  393. else if (!desugared_type->isBuiltinType ()) {
  394. print_error (
  395. std::string ("bad char argument for %c arg: ") +
  396. arg->getType ().getAsString (), arg, ctx->past);
  397. return false;
  398. }
  399. auto builtin_type = dyn_cast<BuiltinType> (desugared_type);
  400. auto kind = builtin_type->getKind ();
  401. if (kind != BuiltinType::Kind::UChar &&
  402. kind != BuiltinType::Kind::SChar) {
  403. print_error (
  404. std::string ("bad char argument for %c arg: ") +
  405. arg->getType ().getAsString (), arg, ctx->past);
  406. return false;
  407. }
  408. return true;
  409. }
  410. static bool
  411. size_arg_handler (const Expr *arg, struct PrintfArgChecker *ctx)
  412. {
  413. auto type = arg->getType ().split ().Ty;
  414. auto desugared_type = type->getUnqualifiedDesugaredType ();
  415. if (!desugared_type->isIntegerType ()) {
  416. print_error (
  417. std::string ("bad integer argument for %z arg: ") +
  418. arg->getType ().getAsString (), arg, ctx->past);
  419. return false;
  420. }
  421. else if (!desugared_type->isBuiltinType ()) {
  422. print_error (
  423. std::string ("bad integer argument for %z arg: ") +
  424. arg->getType ().getAsString (), arg, ctx->past);
  425. return false;
  426. }
  427. auto builtin_type = dyn_cast<BuiltinType> (desugared_type);
  428. auto kind = builtin_type->getKind ();
  429. if (sizeof (size_t) == sizeof (long)) {
  430. if (kind != BuiltinType::Kind::ULong &&
  431. kind != BuiltinType::Kind::Long) {
  432. print_error (
  433. std::string ("bad integer argument for %z arg: ") +
  434. arg->getType ().getAsString (), arg, ctx->past);
  435. return false;
  436. }
  437. }
  438. else if (sizeof (size_t) == sizeof (int)) {
  439. if (kind != BuiltinType::Kind::UInt &&
  440. kind != BuiltinType::Kind::Int) {
  441. print_error (
  442. std::string ("bad integer argument for %z arg: ") +
  443. arg->getType ().getAsString (), arg, ctx->past);
  444. return false;
  445. }
  446. }
  447. return true;
  448. }
  449. static bool
  450. double_arg_handler (const Expr *arg, struct PrintfArgChecker *ctx)
  451. {
  452. auto type = arg->getType ().split ().Ty;
  453. auto desugared_type = type->getUnqualifiedDesugaredType ();
  454. if (!desugared_type->isRealFloatingType ()) {
  455. print_error (
  456. std::string ("bad double argument for %f or %g arg: ") +
  457. arg->getType ().getAsString (), arg, ctx->past);
  458. return false;
  459. }
  460. else if (!desugared_type->isBuiltinType ()) {
  461. print_error (
  462. std::string ("bad double argument for %f or %g arg: ") +
  463. arg->getType ().getAsString (), arg, ctx->past);
  464. return false;
  465. }
  466. auto builtin_type = dyn_cast<BuiltinType> (desugared_type);
  467. auto kind = builtin_type->getKind ();
  468. if (kind != BuiltinType::Kind::Double) {
  469. print_error (
  470. std::string ("bad double argument for %f or %g arg: ") +
  471. arg->getType ().getAsString (), arg, ctx->past);
  472. return false;
  473. }
  474. return true;
  475. }
  476. static bool
  477. long_double_arg_handler (const Expr *arg, struct PrintfArgChecker *ctx)
  478. {
  479. auto type = arg->getType ().split ().Ty;
  480. auto desugared_type = type->getUnqualifiedDesugaredType ();
  481. if (!desugared_type->isRealFloatingType ()) {
  482. print_error (
  483. std::string ("bad long double argument for %F or %G arg: ") +
  484. arg->getType ().getAsString (), arg, ctx->past);
  485. return false;
  486. }
  487. else if (!desugared_type->isBuiltinType ()) {
  488. print_error (
  489. std::string ("bad long double argument for %F or %G arg: ") +
  490. arg->getType ().getAsString (), arg, ctx->past);
  491. return false;
  492. }
  493. auto builtin_type = dyn_cast<BuiltinType> (desugared_type);
  494. auto kind = builtin_type->getKind ();
  495. if (kind != BuiltinType::Kind::LongDouble) {
  496. print_error (
  497. std::string ("bad long double argument for %F or %G arg: ") +
  498. arg->getType ().getAsString (), arg, ctx->past);
  499. return false;
  500. }
  501. return true;
  502. }
  503. };