You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Congestion.cxx 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. /* Copyright 2009-2018 Pierre Ossman for Cendio AB
  2. *
  3. * This is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License as published by
  5. * the Free Software Foundation; either version 2 of the License, or
  6. * (at your option) any later version.
  7. *
  8. * This software is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License
  14. * along with this software; if not, write to the Free Software
  15. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  16. * USA.
  17. */
  18. /*
  19. * This code implements congestion control in the same way as TCP in
  20. * order to avoid excessive latency in the transport. This is needed
  21. * because "buffer bloat" is unfortunately still a very real problem.
  22. *
  23. * The basic principle is TCP Congestion Control (RFC 5618), with the
  24. * addition of using the TCP Vegas algorithm. The reason we use Vegas
  25. * is that we run on top of a reliable transport so we need a latency
  26. * based algorithm rather than a loss based one. There is also a lot of
  27. * interpolation of values. This is because we have rather horrible
  28. * granularity in our measurements.
  29. *
  30. * We use a simplistic form of slow start in order to ramp up quickly
  31. * from an idle state. We do not have any persistent threshold though
  32. * as we have too much noise for it to be reliable.
  33. */
  34. #ifdef HAVE_CONFIG_H
  35. #include <config.h>
  36. #endif
  37. #include <assert.h>
  38. #include <sys/time.h>
  39. #ifdef __linux__
  40. #include <sys/ioctl.h>
  41. #include <sys/socket.h>
  42. #include <netinet/in.h>
  43. #include <netinet/tcp.h>
  44. #include <linux/sockios.h>
  45. #endif
  46. #include <rfb/Congestion.h>
  47. #include <rfb/LogWriter.h>
  48. #include <rfb/util.h>
  49. // Debug output on what the congestion control is up to
  50. #undef CONGESTION_DEBUG
  51. // Dump socket congestion window debug trace to disk
  52. #undef CONGESTION_TRACE
  53. using namespace rfb;
  54. // This window should get us going fairly fast on a decent bandwidth network.
  55. // If it's too high, it will rapidly be reduced and stay low.
  56. static const unsigned INITIAL_WINDOW = 16384;
  57. // TCP's minimal window is 3*MSS. But since we don't know the MSS, we
  58. // make a guess at 4 KiB (it's probably a bit higher).
  59. static const unsigned MINIMUM_WINDOW = 4096;
  60. // The current default maximum window for Linux (4 MiB). Should be a good
  61. // limit for now...
  62. static const unsigned MAXIMUM_WINDOW = 4194304;
  63. // Compare position even when wrapped around
  64. static inline bool isAfter(unsigned a, unsigned b) {
  65. return a != b && a - b <= UINT_MAX / 2;
  66. }
  67. static LogWriter vlog("Congestion");
  68. Congestion::Congestion() :
  69. lastPosition(0), extraBuffer(0),
  70. baseRTT(-1), congWindow(INITIAL_WINDOW), inSlowStart(true),
  71. safeBaseRTT(-1), measurements(0), minRTT(-1), minCongestedRTT(-1)
  72. {
  73. gettimeofday(&lastUpdate, NULL);
  74. gettimeofday(&lastSent, NULL);
  75. memset(&lastPong, 0, sizeof(lastPong));
  76. gettimeofday(&lastPongArrival, NULL);
  77. gettimeofday(&lastAdjustment, NULL);
  78. }
  79. Congestion::~Congestion()
  80. {
  81. }
  82. void Congestion::updatePosition(unsigned pos)
  83. {
  84. struct timeval now;
  85. unsigned delta, consumed;
  86. gettimeofday(&now, NULL);
  87. delta = pos - lastPosition;
  88. if ((delta > 0) || (extraBuffer > 0))
  89. lastSent = now;
  90. // Idle for too long?
  91. // We use a very crude RTO calculation in order to keep things simple
  92. // FIXME: should implement RFC 2861
  93. if (msBetween(&lastSent, &now) > __rfbmax(baseRTT*2, 100)) {
  94. #ifdef CONGESTION_DEBUG
  95. vlog.debug("Connection idle for %d ms, resetting congestion control",
  96. msBetween(&lastSent, &now));
  97. #endif
  98. // Close congestion window and redo wire latency measurement
  99. congWindow = __rfbmin(INITIAL_WINDOW, congWindow);
  100. baseRTT = -1;
  101. measurements = 0;
  102. gettimeofday(&lastAdjustment, NULL);
  103. minRTT = minCongestedRTT = -1;
  104. inSlowStart = true;
  105. }
  106. // Commonly we will be in a state of overbuffering. We need to
  107. // estimate the extra delay that causes so we can separate it from
  108. // the delay caused by an incorrect congestion window.
  109. // (we cannot do this until we have a RTT measurement though)
  110. if (baseRTT != (unsigned)-1) {
  111. extraBuffer += delta;
  112. consumed = msBetween(&lastUpdate, &now) * congWindow / baseRTT;
  113. if (extraBuffer < consumed)
  114. extraBuffer = 0;
  115. else
  116. extraBuffer -= consumed;
  117. }
  118. lastPosition = pos;
  119. lastUpdate = now;
  120. }
  121. void Congestion::sentPing()
  122. {
  123. struct RTTInfo rttInfo;
  124. memset(&rttInfo, 0, sizeof(struct RTTInfo));
  125. gettimeofday(&rttInfo.tv, NULL);
  126. rttInfo.pos = lastPosition;
  127. rttInfo.extra = getExtraBuffer();
  128. rttInfo.congested = isCongested();
  129. pings.push_back(rttInfo);
  130. }
  131. void Congestion::gotPong()
  132. {
  133. struct timeval now;
  134. struct RTTInfo rttInfo;
  135. unsigned rtt, delay;
  136. if (pings.empty())
  137. return;
  138. gettimeofday(&now, NULL);
  139. rttInfo = pings.front();
  140. pings.pop_front();
  141. lastPong = rttInfo;
  142. lastPongArrival = now;
  143. rtt = msBetween(&rttInfo.tv, &now);
  144. if (rtt < 1)
  145. rtt = 1;
  146. // Try to estimate wire latency by tracking lowest seen latency
  147. if (rtt < baseRTT)
  148. safeBaseRTT = baseRTT = rtt;
  149. // Pings sent before the last adjustment aren't interesting as they
  150. // aren't a measurement of the current congestion window
  151. if (isBefore(&rttInfo.tv, &lastAdjustment))
  152. return;
  153. // Estimate added delay because of overtaxed buffers (see above)
  154. delay = rttInfo.extra * baseRTT / congWindow;
  155. if (delay < rtt)
  156. rtt -= delay;
  157. else
  158. rtt = 1;
  159. // A latency less than the wire latency means that we've
  160. // understimated the congestion window. We can't really determine
  161. // how much, so pretend that we got no buffer latency at all.
  162. if (rtt < baseRTT)
  163. rtt = baseRTT;
  164. // Record the minimum seen delay (hopefully ignores jitter) and let
  165. // the congestion control do its thing.
  166. //
  167. // Note: We are delay based rather than loss based, which means we
  168. // need to look at pongs even if they weren't limited by the
  169. // current window ("congested"). Otherwise we will fail to
  170. // detect increasing congestion until the application exceeds
  171. // the congestion window.
  172. if (rtt < minRTT)
  173. minRTT = rtt;
  174. if (rttInfo.congested) {
  175. if (rtt < minCongestedRTT)
  176. minCongestedRTT = rtt;
  177. }
  178. measurements++;
  179. updateCongestion();
  180. }
  181. bool Congestion::isCongested()
  182. {
  183. if (getInFlight() < congWindow)
  184. return false;
  185. return true;
  186. }
  187. int Congestion::getUncongestedETA()
  188. {
  189. unsigned targetAcked;
  190. const struct RTTInfo* prevPing;
  191. unsigned eta, elapsed;
  192. unsigned etaNext, delay;
  193. std::list<struct RTTInfo>::const_iterator iter;
  194. targetAcked = lastPosition - congWindow;
  195. // Simple case?
  196. if (isAfter(lastPong.pos, targetAcked))
  197. return 0;
  198. // No measurements yet?
  199. if (baseRTT == (unsigned)-1)
  200. return -1;
  201. prevPing = &lastPong;
  202. eta = 0;
  203. elapsed = msSince(&lastPongArrival);
  204. // Walk the ping queue and figure out which one we are waiting for to
  205. // get to an uncongested state
  206. for (iter = pings.begin(); ;++iter) {
  207. struct RTTInfo curPing;
  208. // If we aren't waiting for a pong that will clear the congested
  209. // state then we have to estimate the final bit by pretending that
  210. // we had a ping just after the last position update.
  211. if (iter == pings.end()) {
  212. curPing.tv = lastUpdate;
  213. curPing.pos = lastPosition;
  214. curPing.extra = extraBuffer;
  215. } else {
  216. curPing = *iter;
  217. }
  218. etaNext = msBetween(&prevPing->tv, &curPing.tv);
  219. // Compensate for buffering delays
  220. delay = curPing.extra * baseRTT / congWindow;
  221. etaNext += delay;
  222. delay = prevPing->extra * baseRTT / congWindow;
  223. if (delay >= etaNext)
  224. etaNext = 0;
  225. else
  226. etaNext -= delay;
  227. // Found it?
  228. if (isAfter(curPing.pos, targetAcked)) {
  229. eta += etaNext * (curPing.pos - targetAcked) / (curPing.pos - prevPing->pos);
  230. if (elapsed > eta)
  231. return 0;
  232. else
  233. return eta - elapsed;
  234. }
  235. assert(iter != pings.end());
  236. eta += etaNext;
  237. prevPing = &*iter;
  238. }
  239. }
  240. size_t Congestion::getBandwidth()
  241. {
  242. size_t bandwidth;
  243. // No measurements yet? Guess RTT of 60 ms
  244. if (safeBaseRTT == (unsigned)-1)
  245. bandwidth = congWindow * 1000 / 60;
  246. else
  247. bandwidth = congWindow * 1000 / safeBaseRTT;
  248. // We're still probing so guess actual bandwidth is halfway between
  249. // the current guess and the next one (slow start doubles each time)
  250. if (inSlowStart)
  251. bandwidth = bandwidth + bandwidth / 2;
  252. return bandwidth;
  253. }
  254. void Congestion::debugTrace(const char* filename, int fd)
  255. {
  256. (void)filename;
  257. (void)fd;
  258. #ifdef CONGESTION_TRACE
  259. #ifdef __linux__
  260. FILE *f;
  261. f = fopen(filename, "ab");
  262. if (f != NULL) {
  263. struct tcp_info info;
  264. int buffered;
  265. socklen_t len;
  266. len = sizeof(info);
  267. if ((getsockopt(fd, IPPROTO_TCP,
  268. TCP_INFO, &info, &len) == 0) &&
  269. (ioctl(fd, SIOCOUTQ, &buffered) == 0)) {
  270. struct timeval now;
  271. gettimeofday(&now, NULL);
  272. fprintf(f, "%u.%06u,%u,%u,%u,%u\n",
  273. (unsigned)now.tv_sec, (unsigned)now.tv_usec,
  274. congWindow, info.tcpi_snd_cwnd * info.tcpi_snd_mss,
  275. getInFlight(), buffered);
  276. }
  277. fclose(f);
  278. }
  279. #endif
  280. #endif
  281. }
  282. unsigned Congestion::getExtraBuffer()
  283. {
  284. unsigned elapsed;
  285. unsigned consumed;
  286. if (baseRTT == (unsigned)-1)
  287. return 0;
  288. elapsed = msSince(&lastUpdate);
  289. consumed = elapsed * congWindow / baseRTT;
  290. if (consumed >= extraBuffer)
  291. return 0;
  292. else
  293. return extraBuffer - consumed;
  294. }
  295. unsigned Congestion::getInFlight()
  296. {
  297. struct RTTInfo nextPong;
  298. unsigned etaNext, delay, elapsed, acked;
  299. // Simple case?
  300. if (lastPosition == lastPong.pos)
  301. return 0;
  302. // No measurements yet?
  303. if (baseRTT == (unsigned)-1) {
  304. if (!pings.empty())
  305. return lastPosition - pings.front().pos;
  306. return 0;
  307. }
  308. // If we aren't waiting for any pong then we have to estimate things
  309. // by pretending that we had a ping just after the last position
  310. // update.
  311. if (pings.empty()) {
  312. nextPong.tv = lastUpdate;
  313. nextPong.pos = lastPosition;
  314. nextPong.extra = extraBuffer;
  315. } else {
  316. nextPong = pings.front();
  317. }
  318. // First we need to estimate how many bytes have made it through
  319. // completely. Look at the next ping that should arrive and figure
  320. // out how far behind it should be and interpolate the positions.
  321. etaNext = msBetween(&lastPong.tv, &nextPong.tv);
  322. // Compensate for buffering delays
  323. delay = nextPong.extra * baseRTT / congWindow;
  324. etaNext += delay;
  325. delay = lastPong.extra * baseRTT / congWindow;
  326. if (delay >= etaNext)
  327. etaNext = 0;
  328. else
  329. etaNext -= delay;
  330. elapsed = msSince(&lastPongArrival);
  331. // The pong should be here any second. Be optimistic and assume
  332. // we can already use its value.
  333. if (etaNext <= elapsed)
  334. acked = nextPong.pos;
  335. else {
  336. acked = lastPong.pos;
  337. acked += (nextPong.pos - lastPong.pos) * elapsed / etaNext;
  338. }
  339. return lastPosition - acked;
  340. }
  341. void Congestion::updateCongestion()
  342. {
  343. unsigned diff;
  344. // We want at least three measurements to avoid noise
  345. if (measurements < 3)
  346. return;
  347. assert(minRTT >= baseRTT);
  348. assert(minCongestedRTT >= baseRTT);
  349. // The goal is to have a slightly too large congestion window since
  350. // a "perfect" one cannot be distinguished from a too small one. This
  351. // translates to a goal of a few extra milliseconds of delay.
  352. diff = minRTT - baseRTT;
  353. if (diff > __rfbmax(100, baseRTT/2)) {
  354. // We have no way of detecting loss, so assume massive latency
  355. // spike means packet loss. Adjust the window and go directly
  356. // to congestion avoidance.
  357. #ifdef CONGESTION_DEBUG
  358. vlog.debug("Latency spike! Backing off...");
  359. #endif
  360. congWindow = congWindow * baseRTT / minRTT;
  361. inSlowStart = false;
  362. }
  363. if (inSlowStart) {
  364. // Slow start. Aggressive growth until we see congestion.
  365. if (diff > 25) {
  366. // If we see an increased latency then we assume we've hit the
  367. // limit and it's time to leave slow start and switch to
  368. // congestion avoidance
  369. congWindow = congWindow * baseRTT / minRTT;
  370. inSlowStart = false;
  371. } else {
  372. // It's not safe to increase unless we actually used the entire
  373. // congestion window, hence we look at minCongestedRTT and not
  374. // minRTT
  375. diff = minCongestedRTT - baseRTT;
  376. if (diff < 25)
  377. congWindow *= 2;
  378. }
  379. } else {
  380. // Congestion avoidance (VEGAS)
  381. if (diff > 50) {
  382. // Slightly too fast
  383. congWindow -= 4096;
  384. } else {
  385. // Only the "congested" pongs are checked to see if the
  386. // window is too small.
  387. diff = minCongestedRTT - baseRTT;
  388. if (diff < 5) {
  389. // Way too slow
  390. congWindow += 8192;
  391. } else if (diff < 25) {
  392. // Too slow
  393. congWindow += 4096;
  394. }
  395. }
  396. }
  397. if (congWindow < MINIMUM_WINDOW)
  398. congWindow = MINIMUM_WINDOW;
  399. if (congWindow > MAXIMUM_WINDOW)
  400. congWindow = MAXIMUM_WINDOW;
  401. #ifdef CONGESTION_DEBUG
  402. vlog.debug("RTT: %d/%d ms (%d ms), Window: %d KiB, Bandwidth: %g Mbps%s",
  403. minRTT, minCongestedRTT, baseRTT, congWindow / 1024,
  404. congWindow * 8.0 / baseRTT / 1000.0,
  405. inSlowStart ? " (slow start)" : "");
  406. #endif
  407. measurements = 0;
  408. gettimeofday(&lastAdjustment, NULL);
  409. minRTT = minCongestedRTT = -1;
  410. }