You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Congestion.cxx 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. /* Copyright 2009-2018 Pierre Ossman for Cendio AB
  2. *
  3. * This is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License as published by
  5. * the Free Software Foundation; either version 2 of the License, or
  6. * (at your option) any later version.
  7. *
  8. * This software is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License
  14. * along with this software; if not, write to the Free Software
  15. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  16. * USA.
  17. */
  18. /*
  19. * This code implements congestion control in the same way as TCP in
  20. * order to avoid excessive latency in the transport. This is needed
  21. * because "buffer bloat" is unfortunately still a very real problem.
  22. *
  23. * The basic principle is TCP Congestion Control (RFC 5618), with the
  24. * addition of using the TCP Vegas algorithm. The reason we use Vegas
  25. * is that we run on top of a reliable transport so we need a latency
  26. * based algorithm rather than a loss based one. There is also a lot of
  27. * interpolation of values. This is because we have rather horrible
  28. * granularity in our measurements.
  29. *
  30. * We use a simplistic form of slow start in order to ramp up quickly
  31. * from an idle state. We do not have any persistent threshold though
  32. * as we have too much noise for it to be reliable.
  33. */
  34. #include <assert.h>
  35. #include <sys/time.h>
  36. #ifdef __linux__
  37. #include <sys/ioctl.h>
  38. #include <sys/socket.h>
  39. #include <netinet/in.h>
  40. #include <netinet/tcp.h>
  41. #include <linux/sockios.h>
  42. #endif
  43. #include <rfb/Congestion.h>
  44. #include <rfb/LogWriter.h>
  45. #include <rfb/util.h>
  46. // Debug output on what the congestion control is up to
  47. #undef CONGESTION_DEBUG
  48. // Dump socket congestion window debug trace to disk
  49. #undef CONGESTION_TRACE
  50. using namespace rfb;
  51. // This window should get us going fairly fast on a decent bandwidth network.
  52. // If it's too high, it will rapidly be reduced and stay low.
  53. static const unsigned INITIAL_WINDOW = 16384;
  54. // TCP's minimal window is 3*MSS. But since we don't know the MSS, we
  55. // make a guess at 4 KiB (it's probably a bit higher).
  56. static const unsigned MINIMUM_WINDOW = 4096;
  57. // The current default maximum window for Linux (4 MiB). Should be a good
  58. // limit for now...
  59. static const unsigned MAXIMUM_WINDOW = 4194304;
  60. // Compare position even when wrapped around
  61. static inline bool isAfter(unsigned a, unsigned b) {
  62. return a != b && a - b <= UINT_MAX / 2;
  63. }
  64. static LogWriter vlog("Congestion");
  65. Congestion::Congestion() :
  66. lastPosition(0), extraBuffer(0),
  67. baseRTT(-1), congWindow(INITIAL_WINDOW), inSlowStart(true),
  68. safeBaseRTT(-1), measurements(0), minRTT(-1), minCongestedRTT(-1)
  69. {
  70. gettimeofday(&lastUpdate, NULL);
  71. gettimeofday(&lastSent, NULL);
  72. memset(&lastPong, 0, sizeof(lastPong));
  73. gettimeofday(&lastPongArrival, NULL);
  74. gettimeofday(&lastAdjustment, NULL);
  75. }
  76. Congestion::~Congestion()
  77. {
  78. }
  79. void Congestion::updatePosition(unsigned pos)
  80. {
  81. struct timeval now;
  82. unsigned delta, consumed;
  83. gettimeofday(&now, NULL);
  84. delta = pos - lastPosition;
  85. if ((delta > 0) || (extraBuffer > 0))
  86. lastSent = now;
  87. // Idle for too long?
  88. // We use a very crude RTO calculation in order to keep things simple
  89. // FIXME: should implement RFC 2861
  90. if (msBetween(&lastSent, &now) > __rfbmax(baseRTT*2, 100)) {
  91. #ifdef CONGESTION_DEBUG
  92. vlog.debug("Connection idle for %d ms, resetting congestion control",
  93. msBetween(&lastSent, &now));
  94. #endif
  95. // Close congestion window and redo wire latency measurement
  96. congWindow = __rfbmin(INITIAL_WINDOW, congWindow);
  97. baseRTT = -1;
  98. measurements = 0;
  99. gettimeofday(&lastAdjustment, NULL);
  100. minRTT = minCongestedRTT = -1;
  101. inSlowStart = true;
  102. }
  103. // Commonly we will be in a state of overbuffering. We need to
  104. // estimate the extra delay that causes so we can separate it from
  105. // the delay caused by an incorrect congestion window.
  106. // (we cannot do this until we have a RTT measurement though)
  107. if (baseRTT != (unsigned)-1) {
  108. extraBuffer += delta;
  109. consumed = msBetween(&lastUpdate, &now) * congWindow / baseRTT;
  110. if (extraBuffer < consumed)
  111. extraBuffer = 0;
  112. else
  113. extraBuffer -= consumed;
  114. }
  115. lastPosition = pos;
  116. lastUpdate = now;
  117. }
  118. void Congestion::sentPing()
  119. {
  120. struct RTTInfo rttInfo;
  121. memset(&rttInfo, 0, sizeof(struct RTTInfo));
  122. gettimeofday(&rttInfo.tv, NULL);
  123. rttInfo.pos = lastPosition;
  124. rttInfo.extra = getExtraBuffer();
  125. rttInfo.congested = isCongested();
  126. pings.push_back(rttInfo);
  127. }
  128. void Congestion::gotPong()
  129. {
  130. struct timeval now;
  131. struct RTTInfo rttInfo;
  132. unsigned rtt, delay;
  133. if (pings.empty())
  134. return;
  135. gettimeofday(&now, NULL);
  136. rttInfo = pings.front();
  137. pings.pop_front();
  138. lastPong = rttInfo;
  139. lastPongArrival = now;
  140. rtt = msBetween(&rttInfo.tv, &now);
  141. if (rtt < 1)
  142. rtt = 1;
  143. // Try to estimate wire latency by tracking lowest seen latency
  144. if (rtt < baseRTT)
  145. safeBaseRTT = baseRTT = rtt;
  146. // Pings sent before the last adjustment aren't interesting as they
  147. // aren't a measurement of the current congestion window
  148. if (isBefore(&rttInfo.tv, &lastAdjustment))
  149. return;
  150. // Estimate added delay because of overtaxed buffers (see above)
  151. delay = rttInfo.extra * baseRTT / congWindow;
  152. if (delay < rtt)
  153. rtt -= delay;
  154. else
  155. rtt = 1;
  156. // A latency less than the wire latency means that we've
  157. // understimated the congestion window. We can't really determine
  158. // how much, so pretend that we got no buffer latency at all.
  159. if (rtt < baseRTT)
  160. rtt = baseRTT;
  161. // Record the minimum seen delay (hopefully ignores jitter) and let
  162. // the congestion control do its thing.
  163. //
  164. // Note: We are delay based rather than loss based, which means we
  165. // need to look at pongs even if they weren't limited by the
  166. // current window ("congested"). Otherwise we will fail to
  167. // detect increasing congestion until the application exceeds
  168. // the congestion window.
  169. if (rtt < minRTT)
  170. minRTT = rtt;
  171. if (rttInfo.congested) {
  172. if (rtt < minCongestedRTT)
  173. minCongestedRTT = rtt;
  174. }
  175. measurements++;
  176. updateCongestion();
  177. }
  178. bool Congestion::isCongested()
  179. {
  180. if (getInFlight() < congWindow)
  181. return false;
  182. return true;
  183. }
  184. int Congestion::getUncongestedETA()
  185. {
  186. unsigned targetAcked;
  187. const struct RTTInfo* prevPing;
  188. unsigned eta, elapsed;
  189. unsigned etaNext, delay;
  190. std::list<struct RTTInfo>::const_iterator iter;
  191. targetAcked = lastPosition - congWindow;
  192. // Simple case?
  193. if (isAfter(lastPong.pos, targetAcked))
  194. return 0;
  195. // No measurements yet?
  196. if (baseRTT == (unsigned)-1)
  197. return -1;
  198. prevPing = &lastPong;
  199. eta = 0;
  200. elapsed = msSince(&lastPongArrival);
  201. // Walk the ping queue and figure out which one we are waiting for to
  202. // get to an uncongested state
  203. for (iter = pings.begin(); ;++iter) {
  204. struct RTTInfo curPing;
  205. // If we aren't waiting for a pong that will clear the congested
  206. // state then we have to estimate the final bit by pretending that
  207. // we had a ping just after the last position update.
  208. if (iter == pings.end()) {
  209. curPing.tv = lastUpdate;
  210. curPing.pos = lastPosition;
  211. curPing.extra = extraBuffer;
  212. } else {
  213. curPing = *iter;
  214. }
  215. etaNext = msBetween(&prevPing->tv, &curPing.tv);
  216. // Compensate for buffering delays
  217. delay = curPing.extra * baseRTT / congWindow;
  218. etaNext += delay;
  219. delay = prevPing->extra * baseRTT / congWindow;
  220. if (delay >= etaNext)
  221. etaNext = 0;
  222. else
  223. etaNext -= delay;
  224. // Found it?
  225. if (isAfter(curPing.pos, targetAcked)) {
  226. eta += etaNext * (curPing.pos - targetAcked) / (curPing.pos - prevPing->pos);
  227. if (elapsed > eta)
  228. return 0;
  229. else
  230. return eta - elapsed;
  231. }
  232. assert(iter != pings.end());
  233. eta += etaNext;
  234. prevPing = &*iter;
  235. }
  236. }
  237. size_t Congestion::getBandwidth()
  238. {
  239. size_t bandwidth;
  240. // No measurements yet? Guess RTT of 60 ms
  241. if (safeBaseRTT == (unsigned)-1)
  242. bandwidth = congWindow * 1000 / 60;
  243. else
  244. bandwidth = congWindow * 1000 / safeBaseRTT;
  245. // We're still probing so guess actual bandwidth is halfway between
  246. // the current guess and the next one (slow start doubles each time)
  247. if (inSlowStart)
  248. bandwidth = bandwidth + bandwidth / 2;
  249. return bandwidth;
  250. }
  251. void Congestion::debugTrace(const char* filename, int fd)
  252. {
  253. #ifdef CONGESTION_TRACE
  254. #ifdef __linux__
  255. FILE *f;
  256. f = fopen(filename, "ab");
  257. if (f != NULL) {
  258. struct tcp_info info;
  259. int buffered;
  260. socklen_t len;
  261. len = sizeof(info);
  262. if ((getsockopt(fd, IPPROTO_TCP,
  263. TCP_INFO, &info, &len) == 0) &&
  264. (ioctl(fd, SIOCOUTQ, &buffered) == 0)) {
  265. struct timeval now;
  266. gettimeofday(&now, NULL);
  267. fprintf(f, "%u.%06u,%u,%u,%u,%u\n",
  268. (unsigned)now.tv_sec, (unsigned)now.tv_usec,
  269. congWindow, info.tcpi_snd_cwnd * info.tcpi_snd_mss,
  270. getInFlight(), buffered);
  271. }
  272. fclose(f);
  273. }
  274. #endif
  275. #endif
  276. }
  277. unsigned Congestion::getExtraBuffer()
  278. {
  279. unsigned elapsed;
  280. unsigned consumed;
  281. if (baseRTT == (unsigned)-1)
  282. return 0;
  283. elapsed = msSince(&lastUpdate);
  284. consumed = elapsed * congWindow / baseRTT;
  285. if (consumed >= extraBuffer)
  286. return 0;
  287. else
  288. return extraBuffer - consumed;
  289. }
  290. unsigned Congestion::getInFlight()
  291. {
  292. struct RTTInfo nextPong;
  293. unsigned etaNext, delay, elapsed, acked;
  294. // Simple case?
  295. if (lastPosition == lastPong.pos)
  296. return 0;
  297. // No measurements yet?
  298. if (baseRTT == (unsigned)-1) {
  299. if (!pings.empty())
  300. return lastPosition - pings.front().pos;
  301. return 0;
  302. }
  303. // If we aren't waiting for any pong then we have to estimate things
  304. // by pretending that we had a ping just after the last position
  305. // update.
  306. if (pings.empty()) {
  307. nextPong.tv = lastUpdate;
  308. nextPong.pos = lastPosition;
  309. nextPong.extra = extraBuffer;
  310. } else {
  311. nextPong = pings.front();
  312. }
  313. // First we need to estimate how many bytes have made it through
  314. // completely. Look at the next ping that should arrive and figure
  315. // out how far behind it should be and interpolate the positions.
  316. etaNext = msBetween(&lastPong.tv, &nextPong.tv);
  317. // Compensate for buffering delays
  318. delay = nextPong.extra * baseRTT / congWindow;
  319. etaNext += delay;
  320. delay = lastPong.extra * baseRTT / congWindow;
  321. if (delay >= etaNext)
  322. etaNext = 0;
  323. else
  324. etaNext -= delay;
  325. elapsed = msSince(&lastPongArrival);
  326. // The pong should be here any second. Be optimistic and assume
  327. // we can already use its value.
  328. if (etaNext <= elapsed)
  329. acked = nextPong.pos;
  330. else {
  331. acked = lastPong.pos;
  332. acked += (nextPong.pos - lastPong.pos) * elapsed / etaNext;
  333. }
  334. return lastPosition - acked;
  335. }
  336. void Congestion::updateCongestion()
  337. {
  338. unsigned diff;
  339. // We want at least three measurements to avoid noise
  340. if (measurements < 3)
  341. return;
  342. assert(minRTT >= baseRTT);
  343. assert(minCongestedRTT >= baseRTT);
  344. // The goal is to have a slightly too large congestion window since
  345. // a "perfect" one cannot be distinguished from a too small one. This
  346. // translates to a goal of a few extra milliseconds of delay.
  347. diff = minRTT - baseRTT;
  348. if (diff > __rfbmax(100, baseRTT/2)) {
  349. // We have no way of detecting loss, so assume massive latency
  350. // spike means packet loss. Adjust the window and go directly
  351. // to congestion avoidance.
  352. #ifdef CONGESTION_DEBUG
  353. vlog.debug("Latency spike! Backing off...");
  354. #endif
  355. congWindow = congWindow * baseRTT / minRTT;
  356. inSlowStart = false;
  357. }
  358. if (inSlowStart) {
  359. // Slow start. Aggressive growth until we see congestion.
  360. if (diff > 25) {
  361. // If we see an increased latency then we assume we've hit the
  362. // limit and it's time to leave slow start and switch to
  363. // congestion avoidance
  364. congWindow = congWindow * baseRTT / minRTT;
  365. inSlowStart = false;
  366. } else {
  367. // It's not safe to increase unless we actually used the entire
  368. // congestion window, hence we look at minCongestedRTT and not
  369. // minRTT
  370. diff = minCongestedRTT - baseRTT;
  371. if (diff < 25)
  372. congWindow *= 2;
  373. }
  374. } else {
  375. // Congestion avoidance (VEGAS)
  376. if (diff > 50) {
  377. // Slightly too fast
  378. congWindow -= 4096;
  379. } else {
  380. // Only the "congested" pongs are checked to see if the
  381. // window is too small.
  382. diff = minCongestedRTT - baseRTT;
  383. if (diff < 5) {
  384. // Way too slow
  385. congWindow += 8192;
  386. } else if (diff < 25) {
  387. // Too slow
  388. congWindow += 4096;
  389. }
  390. }
  391. }
  392. if (congWindow < MINIMUM_WINDOW)
  393. congWindow = MINIMUM_WINDOW;
  394. if (congWindow > MAXIMUM_WINDOW)
  395. congWindow = MAXIMUM_WINDOW;
  396. #ifdef CONGESTION_DEBUG
  397. vlog.debug("RTT: %d/%d ms (%d ms), Window: %d KiB, Bandwidth: %g Mbps%s",
  398. minRTT, minCongestedRTT, baseRTT, congWindow / 1024,
  399. congWindow * 8.0 / baseRTT / 1000.0,
  400. inSlowStart ? " (slow start)" : "");
  401. #endif
  402. measurements = 0;
  403. gettimeofday(&lastAdjustment, NULL);
  404. minRTT = minCongestedRTT = -1;
  405. }