1 files changed, 650 insertions, 0 deletions
diff --git a/common/core/string.cxx b/common/core/string.cxx
new file mode 100644
index 00000000..091836db
--- /dev/null
+++ b/common/core/string.cxx
@@ -0,0 +1,650 @@
+/* Copyright (C) 2002-2005 RealVNC Ltd.  All Rights Reserved.
+ * Copyright 2011-2023 Pierre Ossman for Cendio AB
+ * 
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ * USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <core/string.h>
+
+namespace core {
+
+  std::string format(const char *fmt, ...)
+  {
+    va_list ap;
+    int len;
+    char *buf;
+    std::string out;
+
+    va_start(ap, fmt);
+    len = vsnprintf(nullptr, 0, fmt, ap);
+    va_end(ap);
+
+    if (len < 0)
+      return "";
+
+    buf = new char[len+1];
+
+    va_start(ap, fmt);
+    vsnprintf(buf, len+1, fmt, ap);
+    va_end(ap);
+
+    out = buf;
+
+    delete [] buf;
+
+    return out;
+  }
+
+  std::vector<std::string> split(const char* src,
+                                 const char delimiter)
+  {
+    std::vector<std::string> out;
+    const char *start, *stop;
+
+    if (src[0] == '\0')
+      return out;
+
+    start = src;
+    do {
+      stop = strchr(start, delimiter);
+      if (stop == nullptr) {
+        out.push_back(start);
+      } else {
+        out.push_back(std::string(start, stop-start));
+        start = stop + 1;
+      }
+    } while (stop != nullptr);
+
+    return out;
+  }
+
+  static char intToHex(uint8_t i) {
+    if (i<=9)
+      return '0'+i;
+    else if ((i>=10) && (i<=15))
+      return 'a'+(i-10);
+    assert(false);
+    return '\0';
+  }
+
+  void binToHex(const uint8_t* in, size_t inlen,
+                char* out, size_t outlen) {
+    if (inlen > outlen/2)
+      inlen = outlen/2;
+
+    if (inlen > 0) {
+      assert(in);
+      assert(out);
+    }
+
+    for (size_t i=0; i<inlen; i++) {
+      out[i*2] = intToHex((in[i] >> 4) & 15);
+      out[i*2+1] = intToHex((in[i] & 15));
+    }
+  }
+
+  std::string binToHex(const uint8_t* in, size_t inlen) {
+    char* buffer = new char[inlen*2+1]();
+    std::string out;
+    binToHex(in, inlen, buffer, inlen*2);
+    out = buffer;
+    delete [] buffer;
+    return out;
+  }
+
+  static bool readHexAndShift(char c, uint8_t* v) {
+    c=tolower(c);
+    if ((c >= '0') && (c <= '9'))
+      *v = (*v << 4) + (c - '0');
+    else if ((c >= 'a') && (c <= 'f'))
+      *v = (*v << 4) + (c - 'a' + 10);
+    else
+      return false;
+    return true;
+  }
+
+  bool hexToBin(const char* in, size_t inlen,
+                uint8_t* out, size_t outlen) {
+    assert(in || inlen == 0);
+    assert(out || outlen == 0);
+
+    if (inlen & 1)
+      return false;
+
+    if (inlen > outlen*2)
+      inlen = outlen*2;
+
+    for(size_t i=0; i<inlen; i+=2) {
+      uint8_t byte = 0;
+      if (!readHexAndShift(in[i], &byte) ||
+          !readHexAndShift(in[i+1], &byte))
+        return false;
+      out[i/2] = byte;
+    }
+
+    return true;
+  }
+
+  std::vector<uint8_t> hexToBin(const char* in, size_t inlen) {
+    std::vector<uint8_t> out(inlen/2);
+    if (!hexToBin(in, inlen, out.data(), inlen/2))
+      return std::vector<uint8_t>();
+    return out;
+  }
+
+  std::string convertLF(const char* src, size_t bytes)
+  {
+    size_t sz;
+    std::string out;
+
+    const char* in;
+    size_t in_len;
+
+    // Compute output size
+    sz = 0;
+    in = src;
+    in_len = bytes;
+    while ((in_len > 0) && (*in != '\0')) {
+      if (*in != '\r') {
+        sz++;
+        in++;
+        in_len--;
+        continue;
+      }
+
+      if ((in_len < 2) || (*(in+1) != '\n'))
+        sz++;
+
+      in++;
+      in_len--;
+    }
+
+    // Reserve space
+    out.reserve(sz);
+
+    // And convert
+    in = src;
+    in_len = bytes;
+    while ((in_len > 0) && (*in != '\0')) {
+      if (*in != '\r') {
+        out += *in++;
+        in_len--;
+        continue;
+      }
+
+      if ((in_len < 2) || (*(in+1) != '\n'))
+        out += '\n';
+
+      in++;
+      in_len--;
+    }
+
+    return out;
+  }
+
+  std::string convertCRLF(const char* src, size_t bytes)
+  {
+    std::string out;
+    size_t sz;
+
+    const char* in;
+    size_t in_len;
+
+    // Compute output size
+    sz = 0;
+    in = src;
+    in_len = bytes;
+    while ((in_len > 0) && (*in != '\0')) {
+      sz++;
+
+      if (*in == '\r') {
+        if ((in_len < 2) || (*(in+1) != '\n'))
+          sz++;
+      } else if (*in == '\n') {
+        if ((in == src) || (*(in-1) != '\r'))
+          sz++;
+      }
+
+      in++;
+      in_len--;
+    }
+
+    // Reserve space
+    out.reserve(sz);
+
+    // And convert
+    in = src;
+    in_len = bytes;
+    while ((in_len > 0) && (*in != '\0')) {
+      if (*in == '\n') {
+        if ((in == src) || (*(in-1) != '\r'))
+          out += '\r';
+      }
+
+      out += *in;
+
+      if (*in == '\r') {
+        if ((in_len < 2) || (*(in+1) != '\n'))
+          out += '\n';
+      }
+
+      in++;
+      in_len--;
+    }
+
+    return out;
+  }
+
+  size_t ucs4ToUTF8(unsigned src, char dst[5]) {
+    if (src < 0x80) {
+      *dst++ = src;
+      *dst++ = '\0';
+      return 1;
+    } else if (src < 0x800) {
+      *dst++ = 0xc0 | (src >> 6);
+      *dst++ = 0x80 | (src & 0x3f);
+      *dst++ = '\0';
+      return 2;
+    } else if ((src >= 0xd800) && (src < 0xe000)) {
+      return ucs4ToUTF8(0xfffd, dst);
+    } else if (src < 0x10000) {
+      *dst++ = 0xe0 | (src >> 12);
+      *dst++ = 0x80 | ((src >> 6) & 0x3f);
+      *dst++ = 0x80 | (src & 0x3f);
+      *dst++ = '\0';
+      return 3;
+    } else if (src < 0x110000) {
+      *dst++ = 0xf0 | (src >> 18);
+      *dst++ = 0x80 | ((src >> 12) & 0x3f);
+      *dst++ = 0x80 | ((src >> 6) & 0x3f);
+      *dst++ = 0x80 | (src & 0x3f);
+      *dst++ = '\0';
+      return 4;
+    } else {
+      return ucs4ToUTF8(0xfffd, dst);
+    }
+  }
+
+  size_t utf8ToUCS4(const char* src, size_t max, unsigned* dst) {
+    size_t count, consumed;
+
+    *dst = 0xfffd;
+
+    if (max == 0)
+      return 0;
+
+    consumed = 1;
+
+    if ((*src & 0x80) == 0) {
+      *dst = *src;
+      count = 0;
+    } else if ((*src & 0xe0) == 0xc0) {
+      *dst = *src & 0x1f;
+      count = 1;
+    } else if ((*src & 0xf0) == 0xe0) {
+      *dst = *src & 0x0f;
+      count = 2;
+    } else if ((*src & 0xf8) == 0xf0) {
+      *dst = *src & 0x07;
+      count = 3;
+    } else {
+      // Invalid sequence, consume all continuation characters
+      src++;
+      max--;
+      while ((max-- > 0) && ((*src++ & 0xc0) == 0x80))
+        consumed++;
+      return consumed;
+    }
+
+    src++;
+    max--;
+
+    while (count--) {
+      consumed++;
+
+      // Invalid or truncated sequence?
+      if ((max == 0) || ((*src & 0xc0) != 0x80)) {
+        *dst = 0xfffd;
+        return consumed;
+      }
+
+      *dst <<= 6;
+      *dst |= *src & 0x3f;
+
+      src++;
+      max--;
+    }
+
+    // UTF-16 surrogate code point?
+    if ((*dst >= 0xd800) && (*dst < 0xe000))
+      *dst = 0xfffd;
+
+    return consumed;
+  }
+
+  size_t ucs4ToUTF16(unsigned src, wchar_t dst[3]) {
+    if ((src < 0xd800) || ((src >= 0xe000) && (src < 0x10000))) {
+      *dst++ = src;
+      *dst++ = L'\0';
+      return 1;
+    } else if ((src >= 0x10000) && (src < 0x110000)) {
+      src -= 0x10000;
+      *dst++ = 0xd800 | ((src >> 10) & 0x03ff);
+      *dst++ = 0xdc00 | (src & 0x03ff);
+      *dst++ = L'\0';
+      return 2;
+    } else {
+      return ucs4ToUTF16(0xfffd, dst);
+    }
+  }
+
+  size_t utf16ToUCS4(const wchar_t* src, size_t max, unsigned* dst) {
+    *dst = 0xfffd;
+
+    if (max == 0)
+      return 0;
+
+    if ((*src < 0xd800) || (*src >= 0xe000)) {
+      *dst = *src;
+      return 1;
+    }
+
+    if (*src & 0x0400) {
+      size_t consumed;
+
+      // Invalid sequence, consume all continuation characters
+      consumed = 0;
+      while ((max > 0) && (*src & 0x0400)) {
+        src++;
+        max--;
+        consumed++;
+      }
+
+      return consumed;
+    }
+
+    *dst = *src++;
+    max--;
+
+    // Invalid or truncated sequence?
+    if ((max == 0) || ((*src & 0xfc00) != 0xdc00)) {
+      *dst = 0xfffd;
+      return 1;
+    }
+
+    *dst = 0x10000 + ((*dst & 0x03ff) << 10);
+    *dst |= *src & 0x3ff;
+
+    return 2;
+  }
+
+  std::string latin1ToUTF8(const char* src, size_t bytes) {
+    std::string out;
+    size_t sz;
+
+    const char* in;
+    size_t in_len;
+
+    // Compute output size
+    sz = 0;
+    in = src;
+    in_len = bytes;
+    while ((in_len > 0) && (*in != '\0')) {
+      char buf[5];
+      sz += ucs4ToUTF8(*(const unsigned char*)in, buf);
+      in++;
+      in_len--;
+    }
+
+    // Reserve space
+    out.reserve(sz);
+
+    // And convert
+    in = src;
+    in_len = bytes;
+    while ((in_len > 0) && (*in != '\0')) {
+      char buf[5];
+      ucs4ToUTF8(*(const unsigned char*)in, buf);
+      out += buf;
+      in++;
+      in_len--;
+    }
+
+    return out;
+  }
+
+  std::string utf8ToLatin1(const char* src, size_t bytes) {
+    std::string out;
+    size_t sz;
+
+    const char* in;
+    size_t in_len;
+
+    // Compute output size
+    sz = 0;
+    in = src;
+    in_len = bytes;
+    while ((in_len > 0) && (*in != '\0')) {
+      size_t len;
+      unsigned ucs;
+
+      len = utf8ToUCS4(in, in_len, &ucs);
+      in += len;
+      in_len -= len;
+      sz++;
+    }
+
+    // Reserve space
+    out.reserve(sz);
+
+    // And convert
+    in = src;
+    in_len = bytes;
+    while ((in_len > 0) && (*in != '\0')) {
+      size_t len;
+      unsigned ucs;
+
+      len = utf8ToUCS4(in, in_len, &ucs);
+      in += len;
+      in_len -= len;
+
+      if (ucs > 0xff)
+        out += '?';
+      else
+        out += (unsigned char)ucs;
+    }
+
+    return out;
+  }
+
+  std::string utf16ToUTF8(const wchar_t* src, size_t units)
+  {
+    std::string out;
+    size_t sz;
+
+    const wchar_t* in;
+    size_t in_len;
+
+    // Compute output size
+    sz = 0;
+    in = src;
+    in_len = units;
+    while ((in_len > 0) && (*in != '\0')) {
+      size_t len;
+      unsigned ucs;
+      char buf[5];
+
+      len = utf16ToUCS4(in, in_len, &ucs);
+      in += len;
+      in_len -= len;
+
+      sz += ucs4ToUTF8(ucs, buf);
+    }
+
+    // Reserve space
+    out.reserve(sz);
+
+    // And convert
+    in = src;
+    in_len = units;
+    while ((in_len > 0) && (*in != '\0')) {
+      size_t len;
+      unsigned ucs;
+      char buf[5];
+
+      len = utf16ToUCS4(in, in_len, &ucs);
+      in += len;
+      in_len -= len;
+
+      ucs4ToUTF8(ucs, buf);
+      out += buf;
+    }
+
+    return out;
+  }
+
+  std::wstring utf8ToUTF16(const char* src, size_t bytes)
+  {
+    std::wstring out;
+    size_t sz;
+
+    const char* in;
+    size_t in_len;
+
+    // Compute output size
+    sz = 0;
+    in = src;
+    in_len = bytes;
+    while ((in_len > 0) && (*in != '\0')) {
+      size_t len;
+      unsigned ucs;
+      wchar_t buf[3];
+
+      len = utf8ToUCS4(in, in_len, &ucs);
+      in += len;
+      in_len -= len;
+
+      sz += ucs4ToUTF16(ucs, buf);
+    }
+
+    // Reserve space
+    out.reserve(sz);
+
+    // And convert
+    in = src;
+    in_len = bytes;
+    while ((in_len > 0) && (*in != '\0')) {
+      size_t len;
+      unsigned ucs;
+      wchar_t buf[3];
+
+      len = utf8ToUCS4(in, in_len, &ucs);
+      in += len;
+      in_len -= len;
+
+      ucs4ToUTF16(ucs, buf);
+      out += buf;
+    }
+
+    return out;
+  }
+
+  bool isValidUTF8(const char* str, size_t bytes)
+  {
+    while ((bytes > 0) && (*str != '\0')) {
+      size_t len;
+      unsigned ucs;
+
+      len = utf8ToUCS4(str, bytes, &ucs);
+      str += len;
+      bytes -= len;
+
+      if (ucs == 0xfffd)
+        return false;
+    }
+
+    return true;
+  }
+
+  bool isValidUTF16(const wchar_t* wstr, size_t units)
+  {
+    while ((units > 0) && (*wstr != '\0')) {
+      size_t len;
+      unsigned ucs;
+
+      len = utf16ToUCS4(wstr, units, &ucs);
+      wstr += len;
+      units -= len;
+
+      if (ucs == 0xfffd)
+        return false;
+    }
+
+    return true;
+  }
+
+  static std::string doPrefix(long long value, const char *unit,
+                              unsigned divisor, const char **prefixes,
+                              size_t prefixCount, int precision) {
+    char buffer[256];
+    double newValue;
+    size_t prefix;
+
+    newValue = value;
+    prefix = 0;
+    while (newValue >= divisor) {
+      if (prefix >= prefixCount)
+        break;
+      newValue /= divisor;
+      prefix++;
+    }
+
+    snprintf(buffer, sizeof(buffer), "%.*g %s%s", precision, newValue,
+             (prefix == 0) ? "" : prefixes[prefix-1], unit);
+    buffer[sizeof(buffer)-1] = '\0';
+
+    return buffer;
+  }
+
+  static const char *siPrefixes[] =
+    { "k", "M", "G", "T", "P", "E", "Z", "Y" };
+  static const char *iecPrefixes[] =
+    { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
+
+  std::string siPrefix(long long value, const char *unit,
+                       int precision) {
+    return doPrefix(value, unit, 1000, siPrefixes,
+                    sizeof(siPrefixes)/sizeof(*siPrefixes),
+                    precision);
+  }
+
+  std::string iecPrefix(long long value, const char *unit,
+                        int precision) {
+    return doPrefix(value, unit, 1024, iecPrefixes,
+                    sizeof(iecPrefixes)/sizeof(*iecPrefixes),
+                    precision);
+  }
+};