You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Congestion.cxx 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. /* Copyright 2009-2018 Pierre Ossman for Cendio AB
  2. *
  3. * This is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License as published by
  5. * the Free Software Foundation; either version 2 of the License, or
  6. * (at your option) any later version.
  7. *
  8. * This software is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License
  14. * along with this software; if not, write to the Free Software
  15. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  16. * USA.
  17. */
  18. /*
  19. * This code implements congestion control in the same way as TCP in
  20. * order to avoid excessive latency in the transport. This is needed
  21. * because "buffer bloat" is unfortunately still a very real problem.
  22. *
  23. * The basic principle is TCP Congestion Control (RFC 5618), with the
  24. * addition of using the TCP Vegas algorithm. The reason we use Vegas
  25. * is that we run on top of a reliable transport so we need a latency
  26. * based algorithm rather than a loss based one. There is also a lot of
  27. * interpolation of values. This is because we have rather horrible
  28. * granularity in our measurements.
  29. *
  30. * We use a simplistic form of slow start in order to ramp up quickly
  31. * from an idle state. We do not have any persistent threshold though
  32. * as we have too much noise for it to be reliable.
  33. */
  34. #include <assert.h>
  35. #include <sys/time.h>
  36. #ifdef __linux__
  37. #include <sys/ioctl.h>
  38. #include <sys/socket.h>
  39. #include <netinet/in.h>
  40. #include <netinet/tcp.h>
  41. #include <linux/sockios.h>
  42. #endif
  43. #include <rfb/Congestion.h>
  44. #include <rfb/LogWriter.h>
  45. #include <rfb/util.h>
  46. // Debug output on what the congestion control is up to
  47. #undef CONGESTION_DEBUG
  48. // Dump socket congestion window debug trace to disk
  49. #undef CONGESTION_TRACE
  50. using namespace rfb;
  51. // This window should get us going fairly fast on a decent bandwidth network.
  52. // If it's too high, it will rapidly be reduced and stay low.
  53. static const unsigned INITIAL_WINDOW = 16384;
  54. // TCP's minimal window is 3*MSS. But since we don't know the MSS, we
  55. // make a guess at 4 KiB (it's probably a bit higher).
  56. static const unsigned MINIMUM_WINDOW = 4096;
  57. // The current default maximum window for Linux (4 MiB). Should be a good
  58. // limit for now...
  59. static const unsigned MAXIMUM_WINDOW = 4194304;
  60. static LogWriter vlog("Congestion");
  61. Congestion::Congestion() :
  62. lastPosition(0), extraBuffer(0),
  63. baseRTT(-1), congWindow(INITIAL_WINDOW), inSlowStart(true),
  64. safeBaseRTT(-1), measurements(0), minRTT(-1), minCongestedRTT(-1)
  65. {
  66. gettimeofday(&lastUpdate, NULL);
  67. gettimeofday(&lastSent, NULL);
  68. memset(&lastPong, 0, sizeof(lastPong));
  69. gettimeofday(&lastPongArrival, NULL);
  70. gettimeofday(&lastAdjustment, NULL);
  71. }
  72. Congestion::~Congestion()
  73. {
  74. }
  75. void Congestion::updatePosition(unsigned pos)
  76. {
  77. struct timeval now;
  78. unsigned delta, consumed;
  79. gettimeofday(&now, NULL);
  80. delta = pos - lastPosition;
  81. if ((delta > 0) || (extraBuffer > 0))
  82. lastSent = now;
  83. // Idle for too long?
  84. // We use a very crude RTO calculation in order to keep things simple
  85. // FIXME: should implement RFC 2861
  86. if (msBetween(&lastSent, &now) > __rfbmax(baseRTT*2, 100)) {
  87. #ifdef CONGESTION_DEBUG
  88. vlog.debug("Connection idle for %d ms, resetting congestion control",
  89. msBetween(&lastSent, &now));
  90. #endif
  91. // Close congestion window and redo wire latency measurement
  92. congWindow = __rfbmin(INITIAL_WINDOW, congWindow);
  93. baseRTT = -1;
  94. measurements = 0;
  95. gettimeofday(&lastAdjustment, NULL);
  96. minRTT = minCongestedRTT = -1;
  97. inSlowStart = true;
  98. }
  99. // Commonly we will be in a state of overbuffering. We need to
  100. // estimate the extra delay that causes so we can separate it from
  101. // the delay caused by an incorrect congestion window.
  102. // (we cannot do this until we have a RTT measurement though)
  103. if (baseRTT != (unsigned)-1) {
  104. extraBuffer += delta;
  105. consumed = msBetween(&lastUpdate, &now) * congWindow / baseRTT;
  106. if (extraBuffer < consumed)
  107. extraBuffer = 0;
  108. else
  109. extraBuffer -= consumed;
  110. }
  111. lastPosition = pos;
  112. lastUpdate = now;
  113. }
  114. void Congestion::sentPing()
  115. {
  116. struct RTTInfo rttInfo;
  117. memset(&rttInfo, 0, sizeof(struct RTTInfo));
  118. gettimeofday(&rttInfo.tv, NULL);
  119. rttInfo.pos = lastPosition;
  120. rttInfo.extra = getExtraBuffer();
  121. rttInfo.congested = isCongested();
  122. pings.push_back(rttInfo);
  123. }
  124. void Congestion::gotPong()
  125. {
  126. struct timeval now;
  127. struct RTTInfo rttInfo;
  128. unsigned rtt, delay;
  129. if (pings.empty())
  130. return;
  131. gettimeofday(&now, NULL);
  132. rttInfo = pings.front();
  133. pings.pop_front();
  134. lastPong = rttInfo;
  135. lastPongArrival = now;
  136. rtt = msBetween(&rttInfo.tv, &now);
  137. if (rtt < 1)
  138. rtt = 1;
  139. // Try to estimate wire latency by tracking lowest seen latency
  140. if (rtt < baseRTT)
  141. safeBaseRTT = baseRTT = rtt;
  142. // Pings sent before the last adjustment aren't interesting as they
  143. // aren't a measurement of the current congestion window
  144. if (isBefore(&rttInfo.tv, &lastAdjustment))
  145. return;
  146. // Estimate added delay because of overtaxed buffers (see above)
  147. delay = rttInfo.extra * baseRTT / congWindow;
  148. if (delay < rtt)
  149. rtt -= delay;
  150. else
  151. rtt = 1;
  152. // A latency less than the wire latency means that we've
  153. // understimated the congestion window. We can't really determine
  154. // how much, so pretend that we got no buffer latency at all.
  155. if (rtt < baseRTT)
  156. rtt = baseRTT;
  157. // Record the minimum seen delay (hopefully ignores jitter) and let
  158. // the congestion control do its thing.
  159. //
  160. // Note: We are delay based rather than loss based, which means we
  161. // need to look at pongs even if they weren't limited by the
  162. // current window ("congested"). Otherwise we will fail to
  163. // detect increasing congestion until the application exceeds
  164. // the congestion window.
  165. if (rtt < minRTT)
  166. minRTT = rtt;
  167. if (rttInfo.congested) {
  168. if (rtt < minCongestedRTT)
  169. minCongestedRTT = rtt;
  170. }
  171. measurements++;
  172. updateCongestion();
  173. }
  174. bool Congestion::isCongested()
  175. {
  176. if (getInFlight() < congWindow)
  177. return false;
  178. return true;
  179. }
  180. int Congestion::getUncongestedETA()
  181. {
  182. unsigned targetAcked;
  183. const struct RTTInfo* prevPing;
  184. unsigned eta, elapsed;
  185. unsigned etaNext, delay;
  186. std::list<struct RTTInfo>::const_iterator iter;
  187. targetAcked = lastPosition - congWindow;
  188. // Simple case?
  189. if (lastPong.pos > targetAcked)
  190. return 0;
  191. // No measurements yet?
  192. if (baseRTT == (unsigned)-1)
  193. return -1;
  194. prevPing = &lastPong;
  195. eta = 0;
  196. elapsed = msSince(&lastPongArrival);
  197. // Walk the ping queue and figure out which one we are waiting for to
  198. // get to an uncongested state
  199. for (iter = pings.begin(); ;++iter) {
  200. struct RTTInfo curPing;
  201. // If we aren't waiting for a pong that will clear the congested
  202. // state then we have to estimate the final bit by pretending that
  203. // we had a ping just after the last position update.
  204. if (iter == pings.end()) {
  205. curPing.tv = lastUpdate;
  206. curPing.pos = lastPosition;
  207. curPing.extra = extraBuffer;
  208. } else {
  209. curPing = *iter;
  210. }
  211. etaNext = msBetween(&prevPing->tv, &curPing.tv);
  212. // Compensate for buffering delays
  213. delay = curPing.extra * baseRTT / congWindow;
  214. etaNext += delay;
  215. delay = prevPing->extra * baseRTT / congWindow;
  216. if (delay >= etaNext)
  217. etaNext = 0;
  218. else
  219. etaNext -= delay;
  220. // Found it?
  221. if (curPing.pos > targetAcked) {
  222. eta += etaNext * (curPing.pos - targetAcked) / (curPing.pos - prevPing->pos);
  223. if (elapsed > eta)
  224. return 0;
  225. else
  226. return eta - elapsed;
  227. }
  228. assert(iter != pings.end());
  229. eta += etaNext;
  230. prevPing = &*iter;
  231. }
  232. }
  233. size_t Congestion::getBandwidth()
  234. {
  235. // No measurements yet? Guess RTT of 60 ms
  236. if (safeBaseRTT == (unsigned)-1)
  237. return congWindow * 1000 / 60;
  238. return congWindow * 1000 / safeBaseRTT;
  239. }
  240. void Congestion::debugTrace(const char* filename, int fd)
  241. {
  242. #ifdef CONGESTION_TRACE
  243. #ifdef __linux__
  244. FILE *f;
  245. f = fopen(filename, "ab");
  246. if (f != NULL) {
  247. struct tcp_info info;
  248. int buffered;
  249. socklen_t len;
  250. len = sizeof(info);
  251. if ((getsockopt(fd, IPPROTO_TCP,
  252. TCP_INFO, &info, &len) == 0) &&
  253. (ioctl(fd, SIOCOUTQ, &buffered) == 0)) {
  254. struct timeval now;
  255. gettimeofday(&now, NULL);
  256. fprintf(f, "%u.%06u,%u,%u,%u,%u\n",
  257. (unsigned)now.tv_sec, (unsigned)now.tv_usec,
  258. congWindow, info.tcpi_snd_cwnd * info.tcpi_snd_mss,
  259. getInFlight(), buffered);
  260. }
  261. fclose(f);
  262. }
  263. #endif
  264. #endif
  265. }
  266. unsigned Congestion::getExtraBuffer()
  267. {
  268. unsigned elapsed;
  269. unsigned consumed;
  270. if (baseRTT == (unsigned)-1)
  271. return 0;
  272. elapsed = msSince(&lastUpdate);
  273. consumed = elapsed * congWindow / baseRTT;
  274. if (consumed >= extraBuffer)
  275. return 0;
  276. else
  277. return extraBuffer - consumed;
  278. }
  279. unsigned Congestion::getInFlight()
  280. {
  281. struct RTTInfo nextPong;
  282. unsigned etaNext, delay, elapsed, acked;
  283. // Simple case?
  284. if (lastPosition == lastPong.pos)
  285. return 0;
  286. // No measurements yet?
  287. if (baseRTT == (unsigned)-1) {
  288. if (!pings.empty())
  289. return lastPosition - pings.front().pos;
  290. return 0;
  291. }
  292. // If we aren't waiting for any pong then we have to estimate things
  293. // by pretending that we had a ping just after the last position
  294. // update.
  295. if (pings.empty()) {
  296. nextPong.tv = lastUpdate;
  297. nextPong.pos = lastPosition;
  298. nextPong.extra = extraBuffer;
  299. } else {
  300. nextPong = pings.front();
  301. }
  302. // First we need to estimate how many bytes have made it through
  303. // completely. Look at the next ping that should arrive and figure
  304. // out how far behind it should be and interpolate the positions.
  305. etaNext = msBetween(&lastPong.tv, &nextPong.tv);
  306. // Compensate for buffering delays
  307. delay = nextPong.extra * baseRTT / congWindow;
  308. etaNext += delay;
  309. delay = lastPong.extra * baseRTT / congWindow;
  310. if (delay >= etaNext)
  311. etaNext = 0;
  312. else
  313. etaNext -= delay;
  314. elapsed = msSince(&lastPongArrival);
  315. // The pong should be here any second. Be optimistic and assume
  316. // we can already use its value.
  317. if (etaNext <= elapsed)
  318. acked = nextPong.pos;
  319. else {
  320. acked = lastPong.pos;
  321. acked += (nextPong.pos - lastPong.pos) * elapsed / etaNext;
  322. }
  323. return lastPosition - acked;
  324. }
  325. void Congestion::updateCongestion()
  326. {
  327. unsigned diff;
  328. // We want at least three measurements to avoid noise
  329. if (measurements < 3)
  330. return;
  331. assert(minRTT >= baseRTT);
  332. assert(minCongestedRTT >= baseRTT);
  333. // The goal is to have a slightly too large congestion window since
  334. // a "perfect" one cannot be distinguished from a too small one. This
  335. // translates to a goal of a few extra milliseconds of delay.
  336. diff = minRTT - baseRTT;
  337. if (diff > __rfbmax(100, baseRTT/2)) {
  338. // We have no way of detecting loss, so assume massive latency
  339. // spike means packet loss. Adjust the window and go directly
  340. // to congestion avoidance.
  341. #ifdef CONGESTION_DEBUG
  342. vlog.debug("Latency spike! Backing off...");
  343. #endif
  344. congWindow = congWindow * baseRTT / minRTT;
  345. inSlowStart = false;
  346. }
  347. if (inSlowStart) {
  348. // Slow start. Aggressive growth until we see congestion.
  349. if (diff > 25) {
  350. // If we see an increased latency then we assume we've hit the
  351. // limit and it's time to leave slow start and switch to
  352. // congestion avoidance
  353. congWindow = congWindow * baseRTT / minRTT;
  354. inSlowStart = false;
  355. } else {
  356. // It's not safe to increase unless we actually used the entire
  357. // congestion window, hence we look at minCongestedRTT and not
  358. // minRTT
  359. diff = minCongestedRTT - baseRTT;
  360. if (diff < 25)
  361. congWindow *= 2;
  362. }
  363. } else {
  364. // Congestion avoidance (VEGAS)
  365. if (diff > 50) {
  366. // Slightly too fast
  367. congWindow -= 4096;
  368. } else {
  369. // Only the "congested" pongs are checked to see if the
  370. // window is too small.
  371. diff = minCongestedRTT - baseRTT;
  372. if (diff < 5) {
  373. // Way too slow
  374. congWindow += 8192;
  375. } else if (diff < 25) {
  376. // Too slow
  377. congWindow += 4096;
  378. }
  379. }
  380. }
  381. if (congWindow < MINIMUM_WINDOW)
  382. congWindow = MINIMUM_WINDOW;
  383. if (congWindow > MAXIMUM_WINDOW)
  384. congWindow = MAXIMUM_WINDOW;
  385. #ifdef CONGESTION_DEBUG
  386. vlog.debug("RTT: %d/%d ms (%d ms), Window: %d KiB, Bandwidth: %g Mbps%s",
  387. minRTT, minCongestedRTT, baseRTT, congWindow / 1024,
  388. congWindow * 8.0 / baseRTT / 1000.0,
  389. inSlowStart ? " (slow start)" : "");
  390. #endif
  391. measurements = 0;
  392. gettimeofday(&lastAdjustment, NULL);
  393. minRTT = minCongestedRTT = -1;
  394. }