You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Congestion.cxx 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. /* Copyright 2009-2018 Pierre Ossman for Cendio AB
  2. *
  3. * This is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License as published by
  5. * the Free Software Foundation; either version 2 of the License, or
  6. * (at your option) any later version.
  7. *
  8. * This software is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License
  14. * along with this software; if not, write to the Free Software
  15. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  16. * USA.
  17. */
  18. /*
  19. * This code implements congestion control in the same way as TCP in
  20. * order to avoid excessive latency in the transport. This is needed
  21. * because "buffer bloat" is unfortunately still a very real problem.
  22. *
  23. * The basic principle is TCP Congestion Control (RFC 5618), with the
  24. * addition of using the TCP Vegas algorithm. The reason we use Vegas
  25. * is that we run on top of a reliable transport so we need a latency
  26. * based algorithm rather than a loss based one. There is also a lot of
  27. * interpolation of values. This is because we have rather horrible
  28. * granularity in our measurements.
  29. *
  30. * We use a simplistic form of slow start in order to ramp up quickly
  31. * from an idle state. We do not have any persistent threshold though
  32. * as we have too much noise for it to be reliable.
  33. */
  34. #ifdef HAVE_CONFIG_H
  35. #include <config.h>
  36. #endif
  37. #include <assert.h>
  38. #include <string.h>
  39. #include <sys/time.h>
  40. #ifdef __linux__
  41. #include <sys/ioctl.h>
  42. #include <sys/socket.h>
  43. #include <netinet/in.h>
  44. #include <netinet/tcp.h>
  45. #include <linux/sockios.h>
  46. #endif
  47. #include <rfb/Congestion.h>
  48. #include <rfb/LogWriter.h>
  49. #include <rfb/util.h>
  50. // Debug output on what the congestion control is up to
  51. #undef CONGESTION_DEBUG
  52. // Dump socket congestion window debug trace to disk
  53. #undef CONGESTION_TRACE
  54. using namespace rfb;
  55. // This window should get us going fairly fast on a decent bandwidth network.
  56. // If it's too high, it will rapidly be reduced and stay low.
  57. static const unsigned INITIAL_WINDOW = 16384;
  58. // TCP's minimal window is 3*MSS. But since we don't know the MSS, we
  59. // make a guess at 4 KiB (it's probably a bit higher).
  60. static const unsigned MINIMUM_WINDOW = 4096;
  61. // The current default maximum window for Linux (4 MiB). Should be a good
  62. // limit for now...
  63. static const unsigned MAXIMUM_WINDOW = 4194304;
  64. // Compare position even when wrapped around
  65. static inline bool isAfter(unsigned a, unsigned b) {
  66. return a != b && a - b <= UINT_MAX / 2;
  67. }
  68. static LogWriter vlog("Congestion");
  69. Congestion::Congestion() :
  70. lastPosition(0), extraBuffer(0),
  71. baseRTT(-1), congWindow(INITIAL_WINDOW), inSlowStart(true),
  72. safeBaseRTT(-1), measurements(0), minRTT(-1), minCongestedRTT(-1)
  73. {
  74. gettimeofday(&lastUpdate, NULL);
  75. gettimeofday(&lastSent, NULL);
  76. memset(&lastPong, 0, sizeof(lastPong));
  77. gettimeofday(&lastPongArrival, NULL);
  78. gettimeofday(&lastAdjustment, NULL);
  79. }
  80. Congestion::~Congestion()
  81. {
  82. }
  83. void Congestion::updatePosition(unsigned pos)
  84. {
  85. struct timeval now;
  86. unsigned delta, consumed;
  87. gettimeofday(&now, NULL);
  88. delta = pos - lastPosition;
  89. if ((delta > 0) || (extraBuffer > 0))
  90. lastSent = now;
  91. // Idle for too long?
  92. // We use a very crude RTO calculation in order to keep things simple
  93. // FIXME: should implement RFC 2861
  94. if (msBetween(&lastSent, &now) > __rfbmax(baseRTT*2, 100)) {
  95. #ifdef CONGESTION_DEBUG
  96. vlog.debug("Connection idle for %d ms, resetting congestion control",
  97. msBetween(&lastSent, &now));
  98. #endif
  99. // Close congestion window and redo wire latency measurement
  100. congWindow = __rfbmin(INITIAL_WINDOW, congWindow);
  101. baseRTT = -1;
  102. measurements = 0;
  103. gettimeofday(&lastAdjustment, NULL);
  104. minRTT = minCongestedRTT = -1;
  105. inSlowStart = true;
  106. }
  107. // Commonly we will be in a state of overbuffering. We need to
  108. // estimate the extra delay that causes so we can separate it from
  109. // the delay caused by an incorrect congestion window.
  110. // (we cannot do this until we have a RTT measurement though)
  111. if (baseRTT != (unsigned)-1) {
  112. extraBuffer += delta;
  113. consumed = msBetween(&lastUpdate, &now) * congWindow / baseRTT;
  114. if (extraBuffer < consumed)
  115. extraBuffer = 0;
  116. else
  117. extraBuffer -= consumed;
  118. }
  119. lastPosition = pos;
  120. lastUpdate = now;
  121. }
  122. void Congestion::sentPing()
  123. {
  124. struct RTTInfo rttInfo;
  125. memset(&rttInfo, 0, sizeof(struct RTTInfo));
  126. gettimeofday(&rttInfo.tv, NULL);
  127. rttInfo.pos = lastPosition;
  128. rttInfo.extra = getExtraBuffer();
  129. rttInfo.congested = isCongested();
  130. pings.push_back(rttInfo);
  131. }
  132. void Congestion::gotPong()
  133. {
  134. struct timeval now;
  135. struct RTTInfo rttInfo;
  136. unsigned rtt, delay;
  137. if (pings.empty())
  138. return;
  139. gettimeofday(&now, NULL);
  140. rttInfo = pings.front();
  141. pings.pop_front();
  142. lastPong = rttInfo;
  143. lastPongArrival = now;
  144. rtt = msBetween(&rttInfo.tv, &now);
  145. if (rtt < 1)
  146. rtt = 1;
  147. // Try to estimate wire latency by tracking lowest seen latency
  148. if (rtt < baseRTT)
  149. safeBaseRTT = baseRTT = rtt;
  150. // Pings sent before the last adjustment aren't interesting as they
  151. // aren't a measurement of the current congestion window
  152. if (isBefore(&rttInfo.tv, &lastAdjustment))
  153. return;
  154. // Estimate added delay because of overtaxed buffers (see above)
  155. delay = rttInfo.extra * baseRTT / congWindow;
  156. if (delay < rtt)
  157. rtt -= delay;
  158. else
  159. rtt = 1;
  160. // A latency less than the wire latency means that we've
  161. // understimated the congestion window. We can't really determine
  162. // how much, so pretend that we got no buffer latency at all.
  163. if (rtt < baseRTT)
  164. rtt = baseRTT;
  165. // Record the minimum seen delay (hopefully ignores jitter) and let
  166. // the congestion control do its thing.
  167. //
  168. // Note: We are delay based rather than loss based, which means we
  169. // need to look at pongs even if they weren't limited by the
  170. // current window ("congested"). Otherwise we will fail to
  171. // detect increasing congestion until the application exceeds
  172. // the congestion window.
  173. if (rtt < minRTT)
  174. minRTT = rtt;
  175. if (rttInfo.congested) {
  176. if (rtt < minCongestedRTT)
  177. minCongestedRTT = rtt;
  178. }
  179. measurements++;
  180. updateCongestion();
  181. }
  182. bool Congestion::isCongested()
  183. {
  184. if (getInFlight() < congWindow)
  185. return false;
  186. return true;
  187. }
  188. int Congestion::getUncongestedETA()
  189. {
  190. unsigned targetAcked;
  191. const struct RTTInfo* prevPing;
  192. unsigned eta, elapsed;
  193. unsigned etaNext, delay;
  194. std::list<struct RTTInfo>::const_iterator iter;
  195. targetAcked = lastPosition - congWindow;
  196. // Simple case?
  197. if (isAfter(lastPong.pos, targetAcked))
  198. return 0;
  199. // No measurements yet?
  200. if (baseRTT == (unsigned)-1)
  201. return -1;
  202. prevPing = &lastPong;
  203. eta = 0;
  204. elapsed = msSince(&lastPongArrival);
  205. // Walk the ping queue and figure out which one we are waiting for to
  206. // get to an uncongested state
  207. for (iter = pings.begin(); ;++iter) {
  208. struct RTTInfo curPing;
  209. // If we aren't waiting for a pong that will clear the congested
  210. // state then we have to estimate the final bit by pretending that
  211. // we had a ping just after the last position update.
  212. if (iter == pings.end()) {
  213. curPing.tv = lastUpdate;
  214. curPing.pos = lastPosition;
  215. curPing.extra = extraBuffer;
  216. } else {
  217. curPing = *iter;
  218. }
  219. etaNext = msBetween(&prevPing->tv, &curPing.tv);
  220. // Compensate for buffering delays
  221. delay = curPing.extra * baseRTT / congWindow;
  222. etaNext += delay;
  223. delay = prevPing->extra * baseRTT / congWindow;
  224. if (delay >= etaNext)
  225. etaNext = 0;
  226. else
  227. etaNext -= delay;
  228. // Found it?
  229. if (isAfter(curPing.pos, targetAcked)) {
  230. eta += etaNext * (curPing.pos - targetAcked) / (curPing.pos - prevPing->pos);
  231. if (elapsed > eta)
  232. return 0;
  233. else
  234. return eta - elapsed;
  235. }
  236. assert(iter != pings.end());
  237. eta += etaNext;
  238. prevPing = &*iter;
  239. }
  240. }
  241. size_t Congestion::getBandwidth()
  242. {
  243. size_t bandwidth;
  244. // No measurements yet? Guess RTT of 60 ms
  245. if (safeBaseRTT == (unsigned)-1)
  246. bandwidth = congWindow * 1000 / 60;
  247. else
  248. bandwidth = congWindow * 1000 / safeBaseRTT;
  249. // We're still probing so guess actual bandwidth is halfway between
  250. // the current guess and the next one (slow start doubles each time)
  251. if (inSlowStart)
  252. bandwidth = bandwidth + bandwidth / 2;
  253. return bandwidth;
  254. }
  255. void Congestion::debugTrace(const char* filename, int fd)
  256. {
  257. (void)filename;
  258. (void)fd;
  259. #ifdef CONGESTION_TRACE
  260. #ifdef __linux__
  261. FILE *f;
  262. f = fopen(filename, "ab");
  263. if (f != NULL) {
  264. struct tcp_info info;
  265. int buffered;
  266. socklen_t len;
  267. len = sizeof(info);
  268. if ((getsockopt(fd, IPPROTO_TCP,
  269. TCP_INFO, &info, &len) == 0) &&
  270. (ioctl(fd, SIOCOUTQ, &buffered) == 0)) {
  271. struct timeval now;
  272. gettimeofday(&now, NULL);
  273. fprintf(f, "%u.%06u,%u,%u,%u,%u\n",
  274. (unsigned)now.tv_sec, (unsigned)now.tv_usec,
  275. congWindow, info.tcpi_snd_cwnd * info.tcpi_snd_mss,
  276. getInFlight(), buffered);
  277. }
  278. fclose(f);
  279. }
  280. #endif
  281. #endif
  282. }
  283. unsigned Congestion::getExtraBuffer()
  284. {
  285. unsigned elapsed;
  286. unsigned consumed;
  287. if (baseRTT == (unsigned)-1)
  288. return 0;
  289. elapsed = msSince(&lastUpdate);
  290. consumed = elapsed * congWindow / baseRTT;
  291. if (consumed >= extraBuffer)
  292. return 0;
  293. else
  294. return extraBuffer - consumed;
  295. }
  296. unsigned Congestion::getInFlight()
  297. {
  298. struct RTTInfo nextPong;
  299. unsigned etaNext, delay, elapsed, acked;
  300. // Simple case?
  301. if (lastPosition == lastPong.pos)
  302. return 0;
  303. // No measurements yet?
  304. if (baseRTT == (unsigned)-1) {
  305. if (!pings.empty())
  306. return lastPosition - pings.front().pos;
  307. return 0;
  308. }
  309. // If we aren't waiting for any pong then we have to estimate things
  310. // by pretending that we had a ping just after the last position
  311. // update.
  312. if (pings.empty()) {
  313. nextPong.tv = lastUpdate;
  314. nextPong.pos = lastPosition;
  315. nextPong.extra = extraBuffer;
  316. } else {
  317. nextPong = pings.front();
  318. }
  319. // First we need to estimate how many bytes have made it through
  320. // completely. Look at the next ping that should arrive and figure
  321. // out how far behind it should be and interpolate the positions.
  322. etaNext = msBetween(&lastPong.tv, &nextPong.tv);
  323. // Compensate for buffering delays
  324. delay = nextPong.extra * baseRTT / congWindow;
  325. etaNext += delay;
  326. delay = lastPong.extra * baseRTT / congWindow;
  327. if (delay >= etaNext)
  328. etaNext = 0;
  329. else
  330. etaNext -= delay;
  331. elapsed = msSince(&lastPongArrival);
  332. // The pong should be here any second. Be optimistic and assume
  333. // we can already use its value.
  334. if (etaNext <= elapsed)
  335. acked = nextPong.pos;
  336. else {
  337. acked = lastPong.pos;
  338. acked += (nextPong.pos - lastPong.pos) * elapsed / etaNext;
  339. }
  340. return lastPosition - acked;
  341. }
  342. void Congestion::updateCongestion()
  343. {
  344. unsigned diff;
  345. // We want at least three measurements to avoid noise
  346. if (measurements < 3)
  347. return;
  348. assert(minRTT >= baseRTT);
  349. assert(minCongestedRTT >= baseRTT);
  350. // The goal is to have a slightly too large congestion window since
  351. // a "perfect" one cannot be distinguished from a too small one. This
  352. // translates to a goal of a few extra milliseconds of delay.
  353. diff = minRTT - baseRTT;
  354. if (diff > __rfbmax(100, baseRTT/2)) {
  355. // We have no way of detecting loss, so assume massive latency
  356. // spike means packet loss. Adjust the window and go directly
  357. // to congestion avoidance.
  358. #ifdef CONGESTION_DEBUG
  359. vlog.debug("Latency spike! Backing off...");
  360. #endif
  361. congWindow = congWindow * baseRTT / minRTT;
  362. inSlowStart = false;
  363. }
  364. if (inSlowStart) {
  365. // Slow start. Aggressive growth until we see congestion.
  366. if (diff > 25) {
  367. // If we see an increased latency then we assume we've hit the
  368. // limit and it's time to leave slow start and switch to
  369. // congestion avoidance
  370. congWindow = congWindow * baseRTT / minRTT;
  371. inSlowStart = false;
  372. } else {
  373. // It's not safe to increase unless we actually used the entire
  374. // congestion window, hence we look at minCongestedRTT and not
  375. // minRTT
  376. diff = minCongestedRTT - baseRTT;
  377. if (diff < 25)
  378. congWindow *= 2;
  379. }
  380. } else {
  381. // Congestion avoidance (VEGAS)
  382. if (diff > 50) {
  383. // Slightly too fast
  384. congWindow -= 4096;
  385. } else {
  386. // Only the "congested" pongs are checked to see if the
  387. // window is too small.
  388. diff = minCongestedRTT - baseRTT;
  389. if (diff < 5) {
  390. // Way too slow
  391. congWindow += 8192;
  392. } else if (diff < 25) {
  393. // Too slow
  394. congWindow += 4096;
  395. }
  396. }
  397. }
  398. if (congWindow < MINIMUM_WINDOW)
  399. congWindow = MINIMUM_WINDOW;
  400. if (congWindow > MAXIMUM_WINDOW)
  401. congWindow = MAXIMUM_WINDOW;
  402. #ifdef CONGESTION_DEBUG
  403. vlog.debug("RTT: %d/%d ms (%d ms), Window: %d KiB, Bandwidth: %g Mbps%s",
  404. minRTT, minCongestedRTT, baseRTT, congWindow / 1024,
  405. congWindow * 8.0 / baseRTT / 1000.0,
  406. inSlowStart ? " (slow start)" : "");
  407. #endif
  408. measurements = 0;
  409. gettimeofday(&lastAdjustment, NULL);
  410. minRTT = minCongestedRTT = -1;
  411. }