You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Congestion.cxx 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. /* Copyright 2009-2018 Pierre Ossman for Cendio AB
  2. *
  3. * This is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License as published by
  5. * the Free Software Foundation; either version 2 of the License, or
  6. * (at your option) any later version.
  7. *
  8. * This software is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License
  14. * along with this software; if not, write to the Free Software
  15. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  16. * USA.
  17. */
  18. /*
  19. * This code implements congestion control in the same way as TCP in
  20. * order to avoid excessive latency in the transport. This is needed
  21. * because "buffer bloat" is unfortunately still a very real problem.
  22. *
  23. * The basic principle is TCP Congestion Control (RFC 5618), with the
  24. * addition of using the TCP Vegas algorithm. The reason we use Vegas
  25. * is that we run on top of a reliable transport so we need a latency
  26. * based algorithm rather than a loss based one. There is also a lot of
  27. * interpolation of values. This is because we have rather horrible
  28. * granularity in our measurements.
  29. *
  30. * We use a simplistic form of slow start in order to ramp up quickly
  31. * from an idle state. We do not have any persistent threshold though
  32. * as we have too much noise for it to be reliable.
  33. */
  34. #ifdef HAVE_CONFIG_H
  35. #include <config.h>
  36. #endif
  37. #include <assert.h>
  38. #include <sys/time.h>
  39. #ifdef __linux__
  40. #include <sys/ioctl.h>
  41. #include <sys/socket.h>
  42. #include <netinet/in.h>
  43. #include <netinet/tcp.h>
  44. #include <linux/sockios.h>
  45. #endif
  46. #include <rfb/Congestion.h>
  47. #include <rfb/LogWriter.h>
  48. #include <rfb/util.h>
  49. // Debug output on what the congestion control is up to
  50. #undef CONGESTION_DEBUG
  51. // Dump socket congestion window debug trace to disk
  52. #undef CONGESTION_TRACE
  53. using namespace rfb;
  54. // This window should get us going fairly fast on a decent bandwidth network.
  55. // If it's too high, it will rapidly be reduced and stay low.
  56. static const unsigned INITIAL_WINDOW = 16384;
  57. // TCP's minimal window is 3*MSS. But since we don't know the MSS, we
  58. // make a guess at 4 KiB (it's probably a bit higher).
  59. static const unsigned MINIMUM_WINDOW = 4096;
  60. // The current default maximum window for Linux (4 MiB). Should be a good
  61. // limit for now...
  62. static const unsigned MAXIMUM_WINDOW = 4194304;
  63. // Compare position even when wrapped around
  64. static inline bool isAfter(unsigned a, unsigned b) {
  65. return a != b && a - b <= UINT_MAX / 2;
  66. }
  67. static LogWriter vlog("Congestion");
  68. Congestion::Congestion() :
  69. lastPosition(0), extraBuffer(0),
  70. baseRTT(-1), congWindow(INITIAL_WINDOW), inSlowStart(true),
  71. safeBaseRTT(-1), measurements(0), minRTT(-1), minCongestedRTT(-1)
  72. {
  73. gettimeofday(&lastUpdate, NULL);
  74. gettimeofday(&lastSent, NULL);
  75. memset(&lastPong, 0, sizeof(lastPong));
  76. gettimeofday(&lastPongArrival, NULL);
  77. gettimeofday(&lastAdjustment, NULL);
  78. }
  79. Congestion::~Congestion()
  80. {
  81. }
  82. void Congestion::updatePosition(unsigned pos)
  83. {
  84. struct timeval now;
  85. unsigned delta, consumed;
  86. gettimeofday(&now, NULL);
  87. delta = pos - lastPosition;
  88. if ((delta > 0) || (extraBuffer > 0))
  89. lastSent = now;
  90. // Idle for too long?
  91. // We use a very crude RTO calculation in order to keep things simple
  92. // FIXME: should implement RFC 2861
  93. if (msBetween(&lastSent, &now) > __rfbmax(baseRTT*2, 100)) {
  94. #ifdef CONGESTION_DEBUG
  95. vlog.debug("Connection idle for %d ms, resetting congestion control",
  96. msBetween(&lastSent, &now));
  97. #endif
  98. // Close congestion window and redo wire latency measurement
  99. congWindow = __rfbmin(INITIAL_WINDOW, congWindow);
  100. baseRTT = -1;
  101. measurements = 0;
  102. gettimeofday(&lastAdjustment, NULL);
  103. minRTT = minCongestedRTT = -1;
  104. inSlowStart = true;
  105. }
  106. // Commonly we will be in a state of overbuffering. We need to
  107. // estimate the extra delay that causes so we can separate it from
  108. // the delay caused by an incorrect congestion window.
  109. // (we cannot do this until we have a RTT measurement though)
  110. if (baseRTT != (unsigned)-1) {
  111. extraBuffer += delta;
  112. consumed = msBetween(&lastUpdate, &now) * congWindow / baseRTT;
  113. if (extraBuffer < consumed)
  114. extraBuffer = 0;
  115. else
  116. extraBuffer -= consumed;
  117. }
  118. lastPosition = pos;
  119. lastUpdate = now;
  120. }
  121. void Congestion::sentPing()
  122. {
  123. struct RTTInfo rttInfo;
  124. memset(&rttInfo, 0, sizeof(struct RTTInfo));
  125. gettimeofday(&rttInfo.tv, NULL);
  126. rttInfo.pos = lastPosition;
  127. rttInfo.extra = getExtraBuffer();
  128. rttInfo.congested = isCongested();
  129. pings.push_back(rttInfo);
  130. }
  131. void Congestion::gotPong()
  132. {
  133. struct timeval now;
  134. struct RTTInfo rttInfo;
  135. unsigned rtt, delay;
  136. if (pings.empty())
  137. return;
  138. gettimeofday(&now, NULL);
  139. rttInfo = pings.front();
  140. pings.pop_front();
  141. lastPong = rttInfo;
  142. lastPongArrival = now;
  143. rtt = msBetween(&rttInfo.tv, &now);
  144. if (rtt < 1)
  145. rtt = 1;
  146. // Try to estimate wire latency by tracking lowest seen latency
  147. if (rtt < baseRTT)
  148. safeBaseRTT = baseRTT = rtt;
  149. // Pings sent before the last adjustment aren't interesting as they
  150. // aren't a measurement of the current congestion window
  151. if (isBefore(&rttInfo.tv, &lastAdjustment))
  152. return;
  153. // Estimate added delay because of overtaxed buffers (see above)
  154. delay = rttInfo.extra * baseRTT / congWindow;
  155. if (delay < rtt)
  156. rtt -= delay;
  157. else
  158. rtt = 1;
  159. // A latency less than the wire latency means that we've
  160. // understimated the congestion window. We can't really determine
  161. // how much, so pretend that we got no buffer latency at all.
  162. if (rtt < baseRTT)
  163. rtt = baseRTT;
  164. // Record the minimum seen delay (hopefully ignores jitter) and let
  165. // the congestion control do its thing.
  166. //
  167. // Note: We are delay based rather than loss based, which means we
  168. // need to look at pongs even if they weren't limited by the
  169. // current window ("congested"). Otherwise we will fail to
  170. // detect increasing congestion until the application exceeds
  171. // the congestion window.
  172. if (rtt < minRTT)
  173. minRTT = rtt;
  174. if (rttInfo.congested) {
  175. if (rtt < minCongestedRTT)
  176. minCongestedRTT = rtt;
  177. }
  178. measurements++;
  179. updateCongestion();
  180. }
  181. bool Congestion::isCongested()
  182. {
  183. if (getInFlight() < congWindow)
  184. return false;
  185. return true;
  186. }
  187. int Congestion::getUncongestedETA()
  188. {
  189. unsigned targetAcked;
  190. const struct RTTInfo* prevPing;
  191. unsigned eta, elapsed;
  192. unsigned etaNext, delay;
  193. std::list<struct RTTInfo>::const_iterator iter;
  194. targetAcked = lastPosition - congWindow;
  195. // Simple case?
  196. if (isAfter(lastPong.pos, targetAcked))
  197. return 0;
  198. // No measurements yet?
  199. if (baseRTT == (unsigned)-1)
  200. return -1;
  201. prevPing = &lastPong;
  202. eta = 0;
  203. elapsed = msSince(&lastPongArrival);
  204. // Walk the ping queue and figure out which one we are waiting for to
  205. // get to an uncongested state
  206. for (iter = pings.begin(); ;++iter) {
  207. struct RTTInfo curPing;
  208. // If we aren't waiting for a pong that will clear the congested
  209. // state then we have to estimate the final bit by pretending that
  210. // we had a ping just after the last position update.
  211. if (iter == pings.end()) {
  212. curPing.tv = lastUpdate;
  213. curPing.pos = lastPosition;
  214. curPing.extra = extraBuffer;
  215. } else {
  216. curPing = *iter;
  217. }
  218. etaNext = msBetween(&prevPing->tv, &curPing.tv);
  219. // Compensate for buffering delays
  220. delay = curPing.extra * baseRTT / congWindow;
  221. etaNext += delay;
  222. delay = prevPing->extra * baseRTT / congWindow;
  223. if (delay >= etaNext)
  224. etaNext = 0;
  225. else
  226. etaNext -= delay;
  227. // Found it?
  228. if (isAfter(curPing.pos, targetAcked)) {
  229. eta += etaNext * (curPing.pos - targetAcked) / (curPing.pos - prevPing->pos);
  230. if (elapsed > eta)
  231. return 0;
  232. else
  233. return eta - elapsed;
  234. }
  235. assert(iter != pings.end());
  236. eta += etaNext;
  237. prevPing = &*iter;
  238. }
  239. }
  240. size_t Congestion::getBandwidth()
  241. {
  242. size_t bandwidth;
  243. // No measurements yet? Guess RTT of 60 ms
  244. if (safeBaseRTT == (unsigned)-1)
  245. bandwidth = congWindow * 1000 / 60;
  246. else
  247. bandwidth = congWindow * 1000 / safeBaseRTT;
  248. // We're still probing so guess actual bandwidth is halfway between
  249. // the current guess and the next one (slow start doubles each time)
  250. if (inSlowStart)
  251. bandwidth = bandwidth + bandwidth / 2;
  252. return bandwidth;
  253. }
  254. void Congestion::debugTrace(const char* filename, int fd)
  255. {
  256. #ifdef CONGESTION_TRACE
  257. #ifdef __linux__
  258. FILE *f;
  259. f = fopen(filename, "ab");
  260. if (f != NULL) {
  261. struct tcp_info info;
  262. int buffered;
  263. socklen_t len;
  264. len = sizeof(info);
  265. if ((getsockopt(fd, IPPROTO_TCP,
  266. TCP_INFO, &info, &len) == 0) &&
  267. (ioctl(fd, SIOCOUTQ, &buffered) == 0)) {
  268. struct timeval now;
  269. gettimeofday(&now, NULL);
  270. fprintf(f, "%u.%06u,%u,%u,%u,%u\n",
  271. (unsigned)now.tv_sec, (unsigned)now.tv_usec,
  272. congWindow, info.tcpi_snd_cwnd * info.tcpi_snd_mss,
  273. getInFlight(), buffered);
  274. }
  275. fclose(f);
  276. }
  277. #endif
  278. #endif
  279. }
  280. unsigned Congestion::getExtraBuffer()
  281. {
  282. unsigned elapsed;
  283. unsigned consumed;
  284. if (baseRTT == (unsigned)-1)
  285. return 0;
  286. elapsed = msSince(&lastUpdate);
  287. consumed = elapsed * congWindow / baseRTT;
  288. if (consumed >= extraBuffer)
  289. return 0;
  290. else
  291. return extraBuffer - consumed;
  292. }
  293. unsigned Congestion::getInFlight()
  294. {
  295. struct RTTInfo nextPong;
  296. unsigned etaNext, delay, elapsed, acked;
  297. // Simple case?
  298. if (lastPosition == lastPong.pos)
  299. return 0;
  300. // No measurements yet?
  301. if (baseRTT == (unsigned)-1) {
  302. if (!pings.empty())
  303. return lastPosition - pings.front().pos;
  304. return 0;
  305. }
  306. // If we aren't waiting for any pong then we have to estimate things
  307. // by pretending that we had a ping just after the last position
  308. // update.
  309. if (pings.empty()) {
  310. nextPong.tv = lastUpdate;
  311. nextPong.pos = lastPosition;
  312. nextPong.extra = extraBuffer;
  313. } else {
  314. nextPong = pings.front();
  315. }
  316. // First we need to estimate how many bytes have made it through
  317. // completely. Look at the next ping that should arrive and figure
  318. // out how far behind it should be and interpolate the positions.
  319. etaNext = msBetween(&lastPong.tv, &nextPong.tv);
  320. // Compensate for buffering delays
  321. delay = nextPong.extra * baseRTT / congWindow;
  322. etaNext += delay;
  323. delay = lastPong.extra * baseRTT / congWindow;
  324. if (delay >= etaNext)
  325. etaNext = 0;
  326. else
  327. etaNext -= delay;
  328. elapsed = msSince(&lastPongArrival);
  329. // The pong should be here any second. Be optimistic and assume
  330. // we can already use its value.
  331. if (etaNext <= elapsed)
  332. acked = nextPong.pos;
  333. else {
  334. acked = lastPong.pos;
  335. acked += (nextPong.pos - lastPong.pos) * elapsed / etaNext;
  336. }
  337. return lastPosition - acked;
  338. }
  339. void Congestion::updateCongestion()
  340. {
  341. unsigned diff;
  342. // We want at least three measurements to avoid noise
  343. if (measurements < 3)
  344. return;
  345. assert(minRTT >= baseRTT);
  346. assert(minCongestedRTT >= baseRTT);
  347. // The goal is to have a slightly too large congestion window since
  348. // a "perfect" one cannot be distinguished from a too small one. This
  349. // translates to a goal of a few extra milliseconds of delay.
  350. diff = minRTT - baseRTT;
  351. if (diff > __rfbmax(100, baseRTT/2)) {
  352. // We have no way of detecting loss, so assume massive latency
  353. // spike means packet loss. Adjust the window and go directly
  354. // to congestion avoidance.
  355. #ifdef CONGESTION_DEBUG
  356. vlog.debug("Latency spike! Backing off...");
  357. #endif
  358. congWindow = congWindow * baseRTT / minRTT;
  359. inSlowStart = false;
  360. }
  361. if (inSlowStart) {
  362. // Slow start. Aggressive growth until we see congestion.
  363. if (diff > 25) {
  364. // If we see an increased latency then we assume we've hit the
  365. // limit and it's time to leave slow start and switch to
  366. // congestion avoidance
  367. congWindow = congWindow * baseRTT / minRTT;
  368. inSlowStart = false;
  369. } else {
  370. // It's not safe to increase unless we actually used the entire
  371. // congestion window, hence we look at minCongestedRTT and not
  372. // minRTT
  373. diff = minCongestedRTT - baseRTT;
  374. if (diff < 25)
  375. congWindow *= 2;
  376. }
  377. } else {
  378. // Congestion avoidance (VEGAS)
  379. if (diff > 50) {
  380. // Slightly too fast
  381. congWindow -= 4096;
  382. } else {
  383. // Only the "congested" pongs are checked to see if the
  384. // window is too small.
  385. diff = minCongestedRTT - baseRTT;
  386. if (diff < 5) {
  387. // Way too slow
  388. congWindow += 8192;
  389. } else if (diff < 25) {
  390. // Too slow
  391. congWindow += 4096;
  392. }
  393. }
  394. }
  395. if (congWindow < MINIMUM_WINDOW)
  396. congWindow = MINIMUM_WINDOW;
  397. if (congWindow > MAXIMUM_WINDOW)
  398. congWindow = MAXIMUM_WINDOW;
  399. #ifdef CONGESTION_DEBUG
  400. vlog.debug("RTT: %d/%d ms (%d ms), Window: %d KiB, Bandwidth: %g Mbps%s",
  401. minRTT, minCongestedRTT, baseRTT, congWindow / 1024,
  402. congWindow * 8.0 / baseRTT / 1000.0,
  403. inSlowStart ? " (slow start)" : "");
  404. #endif
  405. measurements = 0;
  406. gettimeofday(&lastAdjustment, NULL);
  407. minRTT = minCongestedRTT = -1;
  408. }