It's a general function and it's better to have that particular complexity in its own place.

8 years ago · c09e5580d1
--- a/common/rfb/CMakeLists.txt
+++ b/common/rfb/CMakeLists.txt
@@ -2,6 +2,7 @@ include_directories(${CMAKE_SOURCE_DIR}/common ${JPEG_INCLUDE_DIR})

 set(RFB_SOURCES
  Blacklist.cxx
  Congestion.cxx
  CConnection.cxx
  CMsgHandler.cxx
  CMsgReader.cxx
--- a/common/rfb/Congestion.cxx
+++ b/common/rfb/Congestion.cxx
@@ -0,0 +1,222 @@
 /* Copyright 2009-2015 Pierre Ossman for Cendio AB
 * 
 * This is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this software; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
 * USA.
 */

 /*
 * This code implements congestion control in the same way as TCP in
 * order to avoid excessive latency in the transport. This is needed
 * because "buffer bloat" is unfortunately still a very real problem.
 *
 * The basic principle is TCP Congestion Control (RFC 5618), with the
 * addition of using the TCP Vegas algorithm. The reason we use Vegas
 * is that we run on top of a reliable transport so we need a latency
 * based algorithm rather than a loss based one.
 */

 #include <sys/time.h>

 #include <rfb/Congestion.h>
 #include <rfb/util.h>

 // Debug output on what the congestion control is up to
 #undef CONGESTION_DEBUG

 using namespace rfb;

 // This window should get us going fairly fast on a decent bandwidth network.
 // If it's too high, it will rapidly be reduced and stay low.
 static const unsigned INITIAL_WINDOW = 16384;

 // TCP's minimal window is 3*MSS. But since we don't know the MSS, we
 // make a guess at 4 KiB (it's probably a bit higher).
 static const unsigned MINIMUM_WINDOW = 4096;

 // The current default maximum window for Linux (4 MiB). Should be a good
 // limit for now...
 static const unsigned MAXIMUM_WINDOW = 4194304;

 struct Congestion::RTTInfo {
  struct timeval tv;
  int offset;
  unsigned inFlight;
 };

 Congestion::Congestion() :
    baseRTT(-1), congWindow(INITIAL_WINDOW),
    ackedOffset(0), sentOffset(0),
    minRTT(-1), seenCongestion(false),
    congestionTimer(this)
 {
 }

 Congestion::~Congestion()
 {
 }


 void Congestion::sentPing(int offset)
 {
  struct RTTInfo rttInfo;

  if (ackedOffset == 0)
    ackedOffset = offset;

  memset(&rttInfo, 0, sizeof(struct RTTInfo));

  gettimeofday(&rttInfo.tv, NULL);
  rttInfo.offset = offset;
  rttInfo.inFlight = rttInfo.offset - ackedOffset;

  pings.push_back(rttInfo);

  sentOffset = offset;

  // Let some data flow before we adjust the settings
  if (!congestionTimer.isStarted())
    congestionTimer.start(__rfbmin(baseRTT * 2, 100));
 }

 void Congestion::gotPong()
 {
  struct RTTInfo rttInfo;
  unsigned rtt, delay;

  if (pings.empty())
    return;

  rttInfo = pings.front();
  pings.pop_front();

  rtt = msSince(&rttInfo.tv);
  if (rtt < 1)
    rtt = 1;

  ackedOffset = rttInfo.offset;

  // Try to estimate wire latency by tracking lowest seen latency
  if (rtt < baseRTT)
    baseRTT = rtt;

  if (rttInfo.inFlight > congWindow) {
    seenCongestion = true;

    // Estimate added delay because of overtaxed buffers
    delay = (rttInfo.inFlight - congWindow) * baseRTT / congWindow;

    if (delay < rtt)
      rtt -= delay;
    else
      rtt = 1;

    // If we underestimate the congestion window, then we'll get a latency
    // that's less than the wire latency, which will confuse other portions
    // of the code.
    if (rtt < baseRTT)
      rtt = baseRTT;
  }

  // We only keep track of the minimum latency seen (for a given interval)
  // on the basis that we want to avoid continuous buffer issue, but don't
  // mind (or even approve of) bursts.
  if (rtt < minRTT)
    minRTT = rtt;
 }

 bool Congestion::isCongested(int offset, unsigned idleTime)
 {
  // Idle for too long? (and no data on the wire)
  //
  // FIXME: This should really just be one baseRTT, but we're getting
  //        problems with triggering the idle timeout on each update.
  //        Maybe we need to use a moving average for the wire latency
  //        instead of baseRTT.
  if ((sentOffset == ackedOffset) && (idleTime > 2 * baseRTT)) {

 #ifdef CONGESTION_DEBUG
    if (congWindow > INITIAL_WINDOW)
      fprintf(stderr, "Reverting to initial window (%d KiB) after %d ms\n",
              INITIAL_WINDOW / 1024, sock->outStream().getIdleTime());
 #endif

    // Close congestion window and allow a transfer
    // FIXME: Reset baseRTT like Linux Vegas?
    congWindow = __rfbmin(INITIAL_WINDOW, congWindow);

    return false;
  }

  // FIXME: Should we compensate for non-update data?
  //        (i.e. use sentOffset instead of offset)
  if ((offset - ackedOffset) < congWindow)
    return false;

  // If we just have one outstanding "ping", that means the client has
  // started receiving our update. In order to not regress compared to
  // before we had congestion avoidance, we allow another update here.
  // This could further clog up the tubes, but congestion control isn't
  // really working properly right now anyway as the wire would otherwise
  // be idle for at least RTT/2.
  if (pings.size() == 1)
    return false;

  return true;
 }

 bool Congestion::handleTimeout(Timer* t)
 {
  unsigned diff;

  if (!seenCongestion)
    return false;

  // The goal is to have a slightly too large congestion window since
  // a "perfect" one cannot be distinguished from a too small one. This
  // translates to a goal of a few extra milliseconds of delay.

  diff = minRTT - baseRTT;

  if (diff > __rfbmin(100, baseRTT)) {
    // Way too fast
    congWindow = congWindow * baseRTT / minRTT;
  } else if (diff > __rfbmin(50, baseRTT/2)) {
    // Slightly too fast
    congWindow -= 4096;
  } else if (diff < 5) {
    // Way too slow
    congWindow += 8192;
  } else if (diff < 25) {
    // Too slow
    congWindow += 4096;
  }

  if (congWindow < MINIMUM_WINDOW)
    congWindow = MINIMUM_WINDOW;
  if (congWindow > MAXIMUM_WINDOW)
    congWindow = MAXIMUM_WINDOW;

 #ifdef CONGESTION_DEBUG
  fprintf(stderr, "RTT: %d ms (%d ms), Window: %d KiB, Bandwidth: %g Mbps\n",
          minRTT, baseRTT, congWindow / 1024,
          congWindow * 8.0 / baseRTT / 1000.0);
 #endif

  minRTT = -1;
  seenCongestion = false;

  return false;
 }

--- a/common/rfb/Congestion.h
+++ b/common/rfb/Congestion.h
@@ -0,0 +1,62 @@
 /* Copyright 2009-2015 Pierre Ossman for Cendio AB
 * 
 * This is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this software; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
 * USA.
 */

 #ifndef __RFB_CONGESTION_H__
 #define __RFB_CONGESTION_H__

 #include <list>

 #include <rfb/Timer.h>

 namespace rfb {
  class Congestion : public Timer::Callback {
  public:
    Congestion();
    ~Congestion();

    // sentPing() must be called when a marker is placed on the
    // outgoing stream, along with the current stream position.
    // gotPong() must be called when the response for such a marker
    // is received.
    void sentPing(int offset);
    void gotPong();

    // isCongested() determines if the transport is currently congested
    // or if more data can be sent. The curren stream position and how
    // long the transport has been idle must be specified.
    bool isCongested(int offset, unsigned idleTime);

  private:
    // Timer callbacks
    virtual bool handleTimeout(Timer* t);

  private:
    unsigned baseRTT;
    unsigned congWindow;
    unsigned ackedOffset, sentOffset;

    unsigned minRTT;
    bool seenCongestion;
    Timer congestionTimer;

    struct RTTInfo;
    std::list<struct RTTInfo> pings;
  };
 }

 #endif
--- a/common/rfb/VNCSConnectionST.cxx
+++ b/common/rfb/VNCSConnectionST.cxx
@@ -17,17 +17,6 @@
 * USA.
 */

 // Debug output on what the congestion control is up to
 #undef CONGESTION_DEBUG

 #include <sys/time.h>

 #ifdef CONGESTION_DEBUG
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <netinet/tcp.h>
 #endif

 #include <network/TcpSocket.h>

 #include <rfb/ComparingUpdateTracker.h>
@@ -49,33 +38,12 @@ using namespace rfb;

 static LogWriter vlog("VNCSConnST");

 // This window should get us going fairly fast on a decent bandwidth network.
 // If it's too high, it will rapidly be reduced and stay low.
 static const unsigned INITIAL_WINDOW = 16384;

 // TCP's minimal window is 3*MSS. But since we don't know the MSS, we
 // make a guess at 4 KiB (it's probably a bit higher).
 static const unsigned MINIMUM_WINDOW = 4096;

 // The current default maximum window for Linux (4 MiB). Should be a good
 // limit for now...
 static const unsigned MAXIMUM_WINDOW = 4194304;

 struct RTTInfo {
  struct timeval tv;
  int offset;
  unsigned inFlight;
 };

 VNCSConnectionST::VNCSConnectionST(VNCServerST* server_, network::Socket *s,
                                   bool reverse)
  : sock(s), reverseConnection(reverse),
    queryConnectTimer(this), inProcessMessages(false),
    pendingSyncFence(false), syncFence(false), fenceFlags(0),
    fenceDataLen(0), fenceData(NULL),
    baseRTT(-1), congWindow(0), ackedOffset(0), sentOffset(0),
    minRTT(-1), seenCongestion(false),
    congestionTimer(this),
    server(server_), updates(false),
    updateRenderedCursor(false), removeRenderedCursor(false),
    continuousUpdates(false), encodeManager(this), pointerEventTime(0),
@@ -428,10 +396,6 @@ void VNCSConnectionST::authSuccess()
  // - Mark the entire display as "dirty"
  updates.add_changed(server->pb->getRect());
  startTime = time(0);

  // - Bootstrap the congestion control
  ackedOffset = sock->outStream().length();
  congWindow = INITIAL_WINDOW;
 }

 void VNCSConnectionST::queryConnection(const char* userName)
@@ -697,7 +661,7 @@ void VNCSConnectionST::fence(rdr::U32 flags, unsigned len, const char data[])
    // Initial dummy fence;
    break;
  case 1:
    handleRTTPong();
    congestion.gotPong();
    break;
  default:
    vlog.error("Fence response of unexpected type received");
@@ -759,9 +723,7 @@ void VNCSConnectionST::supportsContinuousUpdates()
 bool VNCSConnectionST::handleTimeout(Timer* t)
 {
  try {
    if (t == &congestionTimer)
      updateCongestion();
    else if (t == &queryConnectTimer) {
    if (t == &queryConnectTimer) {
      if (state() == RFBSTATE_QUERYING)
        approveConnection(false, "The attempt to prompt the user to accept the connection failed");
    }
@@ -775,20 +737,11 @@ bool VNCSConnectionST::handleTimeout(Timer* t)

 void VNCSConnectionST::writeRTTPing()
 {
  struct RTTInfo rttInfo;
  char type;

  if (!cp.supportsFence)
    return;

  memset(&rttInfo, 0, sizeof(struct RTTInfo));

  gettimeofday(&rttInfo.tv, NULL);
  rttInfo.offset = sock->outStream().length();
  rttInfo.inFlight = rttInfo.offset - ackedOffset;

  pings.push_back(rttInfo);

  // We need to make sure any old update are already processed by the
  // time we get the response back. This allows us to reliably throttle
  // back on client overload, as well as network overload.
@@ -796,63 +749,11 @@ void VNCSConnectionST::writeRTTPing()
  writer()->writeFence(fenceFlagRequest | fenceFlagBlockBefore,
                       sizeof(type), &type);

  sentOffset = rttInfo.offset;

  // Let some data flow before we adjust the settings
  if (!congestionTimer.isStarted())
    congestionTimer.start(__rfbmin(baseRTT * 2, 100));
 }

 void VNCSConnectionST::handleRTTPong()
 {
  struct RTTInfo rttInfo;
  unsigned rtt, delay;

  if (pings.empty())
    return;

  rttInfo = pings.front();
  pings.pop_front();

  rtt = msSince(&rttInfo.tv);
  if (rtt < 1)
    rtt = 1;

  ackedOffset = rttInfo.offset;

  // Try to estimate wire latency by tracking lowest seen latency
  if (rtt < baseRTT)
    baseRTT = rtt;

  if (rttInfo.inFlight > congWindow) {
    seenCongestion = true;

    // Estimate added delay because of overtaxed buffers
    delay = (rttInfo.inFlight - congWindow) * baseRTT / congWindow;

    if (delay < rtt)
      rtt -= delay;
    else
      rtt = 1;

    // If we underestimate the congestion window, then we'll get a latency
    // that's less than the wire latency, which will confuse other portions
    // of the code.
    if (rtt < baseRTT)
      rtt = baseRTT;
  }

  // We only keep track of the minimum latency seen (for a given interval)
  // on the basis that we want to avoid continuous buffer issue, but don't
  // mind (or even approve of) bursts.
  if (rtt < minRTT)
    minRTT = rtt;
  congestion.sentPing(sock->outStream().length());
 }

 bool VNCSConnectionST::isCongested()
 {
  int offset;

  // Stuff still waiting in the send buffer?
  sock->outStream().flush();
  if (sock->outStream().bufferUsage() > 0)
@@ -861,98 +762,8 @@ bool VNCSConnectionST::isCongested()
  if (!cp.supportsFence)
    return false;

  // Idle for too long? (and no data on the wire)
  //
  // FIXME: This should really just be one baseRTT, but we're getting
  //        problems with triggering the idle timeout on each update.
  //        Maybe we need to use a moving average for the wire latency
  //        instead of baseRTT.
  if ((sentOffset == ackedOffset) &&
      (sock->outStream().getIdleTime() > 2 * baseRTT)) {

 #ifdef CONGESTION_DEBUG
    if (congWindow > INITIAL_WINDOW)
      fprintf(stderr, "Reverting to initial window (%d KiB) after %d ms\n",
              INITIAL_WINDOW / 1024, sock->outStream().getIdleTime());
 #endif

    // Close congestion window and allow a transfer
    // FIXME: Reset baseRTT like Linux Vegas?
    congWindow = __rfbmin(INITIAL_WINDOW, congWindow);

    return false;
  }

  offset = sock->outStream().length();

  // FIXME: Should we compensate for non-update data?
  //        (i.e. use sentOffset instead of offset)
  if ((offset - ackedOffset) < congWindow)
    return false;

  // If we just have one outstanding "ping", that means the client has
  // started receiving our update. In order to not regress compared to
  // before we had congestion avoidance, we allow another update here.
  // This could further clog up the tubes, but congestion control isn't
  // really working properly right now anyway as the wire would otherwise
  // be idle for at least RTT/2.
  if (pings.size() == 1)
    return false;

  return true;
 }


 void VNCSConnectionST::updateCongestion()
 {
  unsigned diff;

  if (!seenCongestion)
    return;

  diff = minRTT - baseRTT;

  if (diff > __rfbmin(100, baseRTT)) {
    // Way too fast
    congWindow = congWindow * baseRTT / minRTT;
  } else if (diff > __rfbmin(50, baseRTT/2)) {
    // Slightly too fast
    congWindow -= 4096;
  } else if (diff < 5) {
    // Way too slow
    congWindow += 8192;
  } else if (diff < 25) {
    // Too slow
    congWindow += 4096;
  }

  if (congWindow < MINIMUM_WINDOW)
    congWindow = MINIMUM_WINDOW;
  if (congWindow > MAXIMUM_WINDOW)
    congWindow = MAXIMUM_WINDOW;

 #ifdef CONGESTION_DEBUG
  fprintf(stderr, "RTT: %d ms (%d ms), Window: %d KiB, Bandwidth: %g Mbps\n",
          minRTT, baseRTT, congWindow / 1024,
          congWindow * 8.0 / baseRTT / 1000.0);

 #ifdef TCP_INFO
  struct tcp_info tcp_info;
  socklen_t tcp_info_length;

  tcp_info_length = sizeof(tcp_info);
  if (getsockopt(sock->getFd(), SOL_TCP, TCP_INFO,
                 (void *)&tcp_info, &tcp_info_length) == 0) {
    fprintf(stderr, "Socket: RTT: %d ms (+/- %d ms) Window %d KiB\n",
            tcp_info.tcpi_rtt / 1000, tcp_info.tcpi_rttvar / 1000,
            tcp_info.tcpi_snd_mss * tcp_info.tcpi_snd_cwnd / 1024);
  }
 #endif

 #endif

  minRTT = -1;
  seenCongestion = false;
  return congestion.isCongested(sock->outStream().length(),
                                sock->outStream().getIdleTime());
 }


--- a/common/rfb/VNCSConnectionST.h
+++ b/common/rfb/VNCSConnectionST.h
@@ -27,15 +27,13 @@
 #ifndef __RFB_VNCSCONNECTIONST_H__
 #define __RFB_VNCSCONNECTIONST_H__

 #include <list>
 #include <set>

 #include <rfb/Congestion.h>
 #include <rfb/EncodeManager.h>
 #include <rfb/SConnection.h>
 #include <rfb/Timer.h>

 struct RTTInfo;

 namespace rfb {
  class VNCServerST;

@@ -161,9 +159,7 @@ namespace rfb {

    // Congestion control
    void writeRTTPing();
    void handleRTTPong();
    bool isCongested();
    void updateCongestion();

    // writeFramebufferUpdate() attempts to write a framebuffer update to the
    // client.
@@ -190,14 +186,7 @@ namespace rfb {
    unsigned fenceDataLen;
    char *fenceData;

    unsigned baseRTT;
    unsigned congWindow;
    unsigned ackedOffset, sentOffset;

    unsigned minRTT;
    bool seenCongestion;
    Timer congestionTimer;
    std::list<struct RTTInfo> pings;
    Congestion congestion;

    VNCServerST* server;
    SimpleUpdateTracker updates;