aboutsummaryrefslogtreecommitdiffstats
path: root/tests/perf
diff options
context:
space:
mode:
authorPierre Ossman <ossman@cendio.se>2019-09-09 12:48:25 +0200
committerPierre Ossman <ossman@cendio.se>2019-09-25 15:50:07 +0200
commit94de4dd0c46483477706002e128a61f74278c0f6 (patch)
tree7e51c88049452f0a46faa54177ffd96dc2903efb /tests/perf
parentd06906db59a6aa6434a5dd602b8d3c987d291ca8 (diff)
downloadtigervnc-94de4dd0c46483477706002e128a61f74278c0f6.tar.gz
tigervnc-94de4dd0c46483477706002e128a61f74278c0f6.zip
Split test programs to benchmarking and unit tests
They have very different purpose, so make things easier to work with by having multiple directories.
Diffstat (limited to 'tests/perf')
-rw-r--r--tests/perf/CMakeLists.txt40
-rw-r--r--tests/perf/convperf.cxx224
-rw-r--r--tests/perf/decperf.cxx240
-rw-r--r--tests/perf/encperf.cxx506
-rw-r--r--tests/perf/fbperf.cxx399
-rw-r--r--tests/perf/results/multicore/README40
-rw-r--r--tests/perf/results/multicore/multicore.odsbin0 -> 42291 bytes
-rw-r--r--tests/perf/results/notrans/README28
-rw-r--r--tests/perf/results/notrans/armhf.csv26
-rw-r--r--tests/perf/results/notrans/i386.csv26
-rw-r--r--tests/perf/results/notrans/x86_64.csv26
-rw-r--r--tests/perf/util.cxx178
-rw-r--r--tests/perf/util.h42
13 files changed, 1775 insertions, 0 deletions
diff --git a/tests/perf/CMakeLists.txt b/tests/perf/CMakeLists.txt
new file mode 100644
index 00000000..053bfaae
--- /dev/null
+++ b/tests/perf/CMakeLists.txt
@@ -0,0 +1,40 @@
+include_directories(${FLTK_INCLUDE_DIR})
+include_directories(${GETTEXT_INCLUDE_DIR})
+
+include_directories(${CMAKE_SOURCE_DIR}/common)
+
+add_library(test_util STATIC util.cxx)
+
+add_executable(convperf convperf.cxx)
+target_link_libraries(convperf test_util rfb)
+
+add_executable(decperf decperf.cxx)
+target_link_libraries(decperf test_util rfb)
+
+add_executable(encperf encperf.cxx)
+target_link_libraries(encperf test_util rfb)
+
+set(FBPERF_SOURCES
+ fbperf.cxx
+ ${CMAKE_SOURCE_DIR}/vncviewer/PlatformPixelBuffer.cxx
+ ${CMAKE_SOURCE_DIR}/vncviewer/Surface.cxx)
+if(WIN32)
+ set(FBPERF_SOURCES ${FBPERF_SOURCES} ${CMAKE_SOURCE_DIR}/vncviewer/Surface_Win32.cxx)
+elseif(APPLE)
+ set(FBPERF_SOURCES
+ ${FBPERF_SOURCES} ${CMAKE_SOURCE_DIR}/vncviewer/Surface_OSX.cxx
+ ${FBPERF_SOURCES} ${CMAKE_SOURCE_DIR}/vncviewer/keysym2ucs.c
+ ${FBPERF_SOURCES} ${CMAKE_SOURCE_DIR}/vncviewer/cocoa.mm)
+else()
+ set(FBPERF_SOURCES ${FBPERF_SOURCES} ${CMAKE_SOURCE_DIR}/vncviewer/Surface_X11.cxx)
+endif()
+add_executable(fbperf ${FBPERF_SOURCES})
+target_link_libraries(fbperf test_util rfb ${FLTK_LIBRARIES} ${GETTEXT_LIBRARIES})
+if(WIN32)
+ target_link_libraries(fbperf msimg32)
+endif()
+if(APPLE)
+ target_link_libraries(fbperf "-framework Cocoa")
+ target_link_libraries(fbperf "-framework Carbon")
+ target_link_libraries(fbperf "-framework IOKit")
+endif()
diff --git a/tests/perf/convperf.cxx b/tests/perf/convperf.cxx
new file mode 100644
index 00000000..e4a3fd52
--- /dev/null
+++ b/tests/perf/convperf.cxx
@@ -0,0 +1,224 @@
+/* Copyright 2013-2014 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <rfb/PixelFormat.h>
+
+#include "util.h"
+
+static const int tile = 64;
+static const int fbsize = 4096;
+
+static rdr::U8 *fb1, *fb2;
+
+typedef void (*testfn) (rfb::PixelFormat&, rfb::PixelFormat&, rdr::U8*, rdr::U8*);
+
+struct TestEntry {
+ const char *label;
+ testfn fn;
+};
+
+static void testMemcpy(rfb::PixelFormat &dstpf, rfb::PixelFormat &srcpf,
+ rdr::U8 *dst, rdr::U8 *src)
+{
+ int h;
+ h = tile;
+ while (h--) {
+ memcpy(dst, src, tile * dstpf.bpp/8);
+ dst += fbsize * dstpf.bpp/8;
+ src += fbsize * dstpf.bpp/8;
+ }
+}
+
+static void testBuffer(rfb::PixelFormat &dstpf, rfb::PixelFormat &srcpf,
+ rdr::U8 *dst, rdr::U8 *src)
+{
+ dstpf.bufferFromBuffer(dst, srcpf, src, tile, tile, fbsize, fbsize);
+}
+
+static void testToRGB(rfb::PixelFormat &dstpf, rfb::PixelFormat &srcpf,
+ rdr::U8 *dst, rdr::U8 *src)
+{
+ srcpf.rgbFromBuffer(dst, src, tile, fbsize, tile);
+}
+
+static void testFromRGB(rfb::PixelFormat &dstpf, rfb::PixelFormat &srcpf,
+ rdr::U8 *dst, rdr::U8 *src)
+{
+ dstpf.bufferFromRGB(dst, src, tile, fbsize, tile);
+}
+
+static void doTest(testfn fn, rfb::PixelFormat &dstpf, rfb::PixelFormat &srcpf)
+{
+ startCpuCounter();
+
+ for (int i = 0;i < 10000;i++) {
+ int x, y;
+ rdr::U8 *dst, *src;
+ x = rand() % (fbsize - tile);
+ y = rand() % (fbsize - tile);
+ dst = fb1 + (x + y * fbsize) * dstpf.bpp/8;
+ src = fb2 + (x + y * fbsize) * srcpf.bpp/8;
+ fn(dstpf, srcpf, dst, src);
+ }
+
+ endCpuCounter();
+
+ float data, time;
+
+ data = (double)tile * tile * 10000;
+ time = getCpuCounter();
+
+ printf("%g", data / (1000.0*1000.0) / time);
+}
+
+struct TestEntry tests[] = {
+ {"memcpy", testMemcpy},
+ {"bufferFromBuffer", testBuffer},
+ {"rgbFromBuffer", testToRGB},
+ {"bufferFromRGB", testFromRGB},
+};
+
+static void doTests(rfb::PixelFormat &dstpf, rfb::PixelFormat &srcpf)
+{
+ size_t i;
+ char dstb[256], srcb[256];
+
+ dstpf.print(dstb, sizeof(dstb));
+ srcpf.print(srcb, sizeof(srcb));
+
+ printf("%s,%s", srcb, dstb);
+
+ for (i = 0;i < sizeof(tests)/sizeof(tests[0]);i++) {
+ printf(",");
+ doTest(tests[i].fn, dstpf, srcpf);
+ }
+
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ size_t bufsize;
+
+ time_t t;
+ char datebuffer[256];
+
+ size_t i;
+
+ bufsize = fbsize * fbsize * 4;
+
+ fb1 = new rdr::U8[bufsize];
+ fb2 = new rdr::U8[bufsize];
+
+ for (i = 0;i < bufsize;i++) {
+ fb1[i] = rand();
+ fb2[i] = rand();
+ }
+
+ time(&t);
+ strftime(datebuffer, sizeof(datebuffer), "%Y-%m-%d %H:%M UTC", gmtime(&t));
+
+ printf("# Pixel Conversion Performance Test %s\n", datebuffer);
+ printf("#\n");
+ printf("# Frame buffer: %dx%d pixels\n", fbsize, fbsize);
+ printf("# Tile size: %dx%d pixels\n", tile, tile);
+ printf("#\n");
+ printf("# Note: Results are Mpixels/sec\n");
+ printf("#\n");
+
+ printf("Source format,Destination Format");
+ for (i = 0;i < sizeof(tests)/sizeof(tests[0]);i++)
+ printf(",%s", tests[i].label);
+ printf("\n");
+
+ rfb::PixelFormat dstpf, srcpf;
+
+ /* rgb888 targets */
+
+ printf("\n");
+
+ dstpf.parse("rgb888");
+
+ srcpf.parse("rgb888");
+ doTests(dstpf, srcpf);
+
+ srcpf.parse("bgr888");
+ doTests(dstpf, srcpf);
+
+ srcpf.parse("rgb565");
+ doTests(dstpf, srcpf);
+
+ srcpf.parse("rgb232");
+ doTests(dstpf, srcpf);
+
+ /* rgb565 targets */
+
+ printf("\n");
+
+ dstpf.parse("rgb565");
+
+ srcpf.parse("rgb888");
+ doTests(dstpf, srcpf);
+
+ srcpf.parse("bgr565");
+ doTests(dstpf, srcpf);
+
+ srcpf.parse("rgb232");
+ doTests(dstpf, srcpf);
+
+ /* rgb232 targets */
+
+ printf("\n");
+
+ dstpf.parse("rgb232");
+
+ srcpf.parse("rgb888");
+ doTests(dstpf, srcpf);
+
+ srcpf.parse("rgb565");
+ doTests(dstpf, srcpf);
+
+ srcpf.parse("bgr232");
+ doTests(dstpf, srcpf);
+
+ /* rgb565 with endian conversion (both ways) */
+
+ printf("\n");
+
+ dstpf = rfb::PixelFormat(32, 24, false, true, 255, 255, 255, 0, 8, 16);
+ srcpf = rfb::PixelFormat(32, 24, true, true, 255, 255, 255, 0, 8, 16);
+
+ doTests(srcpf, dstpf);
+
+ doTests(dstpf, srcpf);
+
+ dstpf = rfb::PixelFormat(16, 16, false, true, 31, 63, 31, 0, 5, 11);
+ srcpf = rfb::PixelFormat(16, 16, true, true, 31, 63, 31, 0, 5, 11);
+
+ doTests(srcpf, dstpf);
+
+ doTests(dstpf, srcpf);
+
+ return 0;
+}
+
diff --git a/tests/perf/decperf.cxx b/tests/perf/decperf.cxx
new file mode 100644
index 00000000..df5214f2
--- /dev/null
+++ b/tests/perf/decperf.cxx
@@ -0,0 +1,240 @@
+/* Copyright 2015 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+/*
+ * This program reads files produced by TightVNC's/TurboVNC's
+ * compare-encodings. It is basically a dump of the RFB protocol
+ * from the server side from the ServerInit message and forward.
+ * It is assumed that the client is using a bgr888 (LE) pixel
+ * format.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <sys/time.h>
+
+#include <rdr/Exception.h>
+#include <rdr/FileInStream.h>
+
+#include <rfb/CConnection.h>
+#include <rfb/CMsgReader.h>
+#include <rfb/PixelBuffer.h>
+#include <rfb/PixelFormat.h>
+
+#include "util.h"
+
+// FIXME: Files are always in this format
+static const rfb::PixelFormat filePF(32, 24, false, true, 255, 255, 255, 0, 8, 16);
+
+class CConn : public rfb::CConnection {
+public:
+ CConn(const char *filename);
+ ~CConn();
+
+ virtual void initDone();
+ virtual void setPixelFormat(const rfb::PixelFormat& pf);
+ virtual void setCursor(int, int, const rfb::Point&, const rdr::U8*);
+ virtual void framebufferUpdateStart();
+ virtual void framebufferUpdateEnd();
+ virtual void setColourMapEntries(int, int, rdr::U16*);
+ virtual void bell();
+ virtual void serverCutText(const char*);
+
+public:
+ double cpuTime;
+
+protected:
+ rdr::FileInStream *in;
+};
+
+CConn::CConn(const char *filename)
+{
+ cpuTime = 0.0;
+
+ in = new rdr::FileInStream(filename);
+ setStreams(in, NULL);
+
+ // Need to skip the initial handshake
+ setState(RFBSTATE_INITIALISATION);
+ // That also means that the reader and writer weren't setup
+ setReader(new rfb::CMsgReader(this, in));
+}
+
+CConn::~CConn()
+{
+ delete in;
+}
+
+void CConn::initDone()
+{
+ setFramebuffer(new rfb::ManagedPixelBuffer(filePF,
+ server.width(),
+ server.height()));
+}
+
+void CConn::setPixelFormat(const rfb::PixelFormat& pf)
+{
+ // Override format
+ CConnection::setPixelFormat(filePF);
+}
+
+void CConn::setCursor(int, int, const rfb::Point&, const rdr::U8*)
+{
+}
+
+void CConn::framebufferUpdateStart()
+{
+ CConnection::framebufferUpdateStart();
+
+ startCpuCounter();
+}
+
+void CConn::framebufferUpdateEnd()
+{
+ CConnection::framebufferUpdateEnd();
+
+ endCpuCounter();
+
+ cpuTime += getCpuCounter();
+}
+
+void CConn::setColourMapEntries(int, int, rdr::U16*)
+{
+}
+
+void CConn::bell()
+{
+}
+
+void CConn::serverCutText(const char*)
+{
+}
+
+struct stats
+{
+ double decodeTime;
+ double realTime;
+};
+
+static struct stats runTest(const char *fn)
+{
+ CConn *cc;
+ struct timeval start, stop;
+ struct stats s;
+
+ gettimeofday(&start, NULL);
+
+ try {
+ cc = new CConn(fn);
+ } catch (rdr::Exception& e) {
+ fprintf(stderr, "Failed to open rfb file: %s\n", e.str());
+ exit(1);
+ }
+
+ try {
+ while (true)
+ cc->processMsg();
+ } catch (rdr::EndOfStream& e) {
+ } catch (rdr::Exception& e) {
+ fprintf(stderr, "Failed to run rfb file: %s\n", e.str());
+ exit(1);
+ }
+
+ gettimeofday(&stop, NULL);
+
+ s.decodeTime = cc->cpuTime;
+ s.realTime = (double)stop.tv_sec - start.tv_sec;
+ s.realTime += ((double)stop.tv_usec - start.tv_usec)/1000000.0;
+
+ delete cc;
+
+ return s;
+}
+
+static void sort(double *array, int count)
+{
+ bool sorted;
+ int i;
+ do {
+ sorted = true;
+ for (i = 1;i < count;i++) {
+ if (array[i-1] > array[i]) {
+ double d;
+ d = array[i];
+ array[i] = array[i-1];
+ array[i-1] = d;
+ sorted = false;
+ }
+ }
+ } while (!sorted);
+}
+
+static const int runCount = 9;
+
+int main(int argc, char **argv)
+{
+ int i;
+ struct stats runs[runCount];
+ double values[runCount], dev[runCount];
+ double median, meddev;
+
+ if (argc != 2) {
+ printf("Syntax: %s <rfb file>\n", argv[0]);
+ return 1;
+ }
+
+ // Warmup
+ runTest(argv[1]);
+
+ // Multiple runs to get a good average
+ for (i = 0;i < runCount;i++)
+ runs[i] = runTest(argv[1]);
+
+ // Calculate median and median deviation for CPU usage
+ for (i = 0;i < runCount;i++)
+ values[i] = runs[i].decodeTime;
+
+ sort(values, runCount);
+ median = values[runCount/2];
+
+ for (i = 0;i < runCount;i++)
+ dev[i] = fabs((values[i] - median) / median) * 100;
+
+ sort(dev, runCount);
+ meddev = dev[runCount/2];
+
+ printf("CPU time: %g s (+/- %g %%)\n", median, meddev);
+
+ // And for CPU core usage
+ for (i = 0;i < runCount;i++)
+ values[i] = runs[i].decodeTime / runs[i].realTime;
+
+ sort(values, runCount);
+ median = values[runCount/2];
+
+ for (i = 0;i < runCount;i++)
+ dev[i] = fabs((values[i] - median) / median) * 100;
+
+ sort(dev, runCount);
+ meddev = dev[runCount/2];
+
+ printf("Core usage: %g (+/- %g %%)\n", median, meddev);
+
+ return 0;
+}
diff --git a/tests/perf/encperf.cxx b/tests/perf/encperf.cxx
new file mode 100644
index 00000000..e461197e
--- /dev/null
+++ b/tests/perf/encperf.cxx
@@ -0,0 +1,506 @@
+/* Copyright 2015 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2015 D. R. Commander. All Rights Reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+/*
+ * This program reads files produced by TightVNC's/TurboVNC's
+ * fbs-dump, which in turn takes files from rfbproxy. It is
+ * basically a dump of the RFB protocol from the server side after
+ * the ServerInit message. Mostly this consists of FramebufferUpdate
+ * message using the HexTile encoding. Screen size and pixel format
+ * are not encoded in the file and must be specified by the user.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <sys/time.h>
+
+#include <rdr/Exception.h>
+#include <rdr/OutStream.h>
+#include <rdr/FileInStream.h>
+
+#include <rfb/PixelFormat.h>
+
+#include <rfb/CConnection.h>
+#include <rfb/CMsgReader.h>
+#include <rfb/UpdateTracker.h>
+
+#include <rfb/EncodeManager.h>
+#include <rfb/SConnection.h>
+#include <rfb/SMsgWriter.h>
+
+#include "util.h"
+
+static rfb::IntParameter width("width", "Frame buffer width", 0);
+static rfb::IntParameter height("height", "Frame buffer height", 0);
+static rfb::IntParameter count("count", "Number of benchmark iterations", 9);
+
+static rfb::StringParameter format("format", "Pixel format (e.g. bgr888)", "");
+
+static rfb::BoolParameter translate("translate",
+ "Translate 8-bit and 16-bit datasets into 24-bit",
+ true);
+
+// The frame buffer (and output) is always this format
+static const rfb::PixelFormat fbPF(32, 24, false, true, 255, 255, 255, 0, 8, 16);
+
+// Encodings to use
+static const rdr::S32 encodings[] = {
+ rfb::encodingTight, rfb::encodingCopyRect, rfb::encodingRRE,
+ rfb::encodingHextile, rfb::encodingZRLE, rfb::pseudoEncodingLastRect,
+ rfb::pseudoEncodingQualityLevel0 + 8,
+ rfb::pseudoEncodingCompressLevel0 + 2};
+
+class DummyOutStream : public rdr::OutStream {
+public:
+ DummyOutStream();
+
+ virtual int length();
+ virtual void flush();
+
+private:
+ virtual int overrun(int itemSize, int nItems);
+
+ int offset;
+ rdr::U8 buf[131072];
+};
+
+class CConn : public rfb::CConnection {
+public:
+ CConn(const char *filename);
+ ~CConn();
+
+ void getStats(double& ratio, unsigned long long& bytes,
+ unsigned long long& rawEquivalent);
+
+ virtual void initDone();
+ virtual void setCursor(int, int, const rfb::Point&, const rdr::U8*);
+ virtual void framebufferUpdateStart();
+ virtual void framebufferUpdateEnd();
+ virtual void dataRect(const rfb::Rect&, int);
+ virtual void setColourMapEntries(int, int, rdr::U16*);
+ virtual void bell();
+ virtual void serverCutText(const char*);
+
+public:
+ double decodeTime;
+ double encodeTime;
+
+protected:
+ rdr::FileInStream *in;
+ rfb::SimpleUpdateTracker updates;
+ class SConn *sc;
+};
+
+class Manager : public rfb::EncodeManager {
+public:
+ Manager(class rfb::SConnection *conn);
+
+ void getStats(double&, unsigned long long&, unsigned long long&);
+};
+
+class SConn : public rfb::SConnection {
+public:
+ SConn();
+ ~SConn();
+
+ void writeUpdate(const rfb::UpdateInfo& ui, const rfb::PixelBuffer* pb);
+
+ void getStats(double&, unsigned long long&, unsigned long long&);
+
+ virtual void setAccessRights(AccessRights ar);
+
+ virtual void setDesktopSize(int fb_width, int fb_height,
+ const rfb::ScreenSet& layout);
+
+protected:
+ DummyOutStream *out;
+ Manager *manager;
+};
+
+DummyOutStream::DummyOutStream()
+{
+ offset = 0;
+ ptr = buf;
+ end = buf + sizeof(buf);
+}
+
+int DummyOutStream::length()
+{
+ flush();
+ return offset;
+}
+
+void DummyOutStream::flush()
+{
+ offset += ptr - buf;
+ ptr = buf;
+}
+
+int DummyOutStream::overrun(int itemSize, int nItems)
+{
+ flush();
+ if (itemSize * nItems > end - ptr)
+ nItems = (end - ptr) / itemSize;
+ return nItems;
+}
+
+CConn::CConn(const char *filename)
+{
+ decodeTime = 0.0;
+ encodeTime = 0.0;
+
+ in = new rdr::FileInStream(filename);
+ setStreams(in, NULL);
+
+ // Need to skip the initial handshake and ServerInit
+ setState(RFBSTATE_NORMAL);
+ // That also means that the reader and writer weren't setup
+ setReader(new rfb::CMsgReader(this, in));
+ // Nor the frame buffer size and format
+ rfb::PixelFormat pf;
+ pf.parse(format);
+ setPixelFormat(pf);
+ setDesktopSize(width, height);
+
+ sc = new SConn();
+ sc->client.setPF((bool)translate ? fbPF : pf);
+ sc->setEncodings(sizeof(encodings) / sizeof(*encodings), encodings);
+}
+
+CConn::~CConn()
+{
+ delete sc;
+ delete in;
+}
+
+void CConn::getStats(double& ratio, unsigned long long& bytes,
+ unsigned long long& rawEquivalent)
+{
+ sc->getStats(ratio, bytes, rawEquivalent);
+}
+
+void CConn::initDone()
+{
+ rfb::ModifiablePixelBuffer *pb;
+
+ pb = new rfb::ManagedPixelBuffer((bool)translate ? fbPF : server.pf(),
+ server.width(), server.height());
+ setFramebuffer(pb);
+}
+
+void CConn::setCursor(int, int, const rfb::Point&, const rdr::U8*)
+{
+}
+
+void CConn::framebufferUpdateStart()
+{
+ CConnection::framebufferUpdateStart();
+
+ updates.clear();
+ startCpuCounter();
+}
+
+void CConn::framebufferUpdateEnd()
+{
+ rfb::UpdateInfo ui;
+ rfb::PixelBuffer* pb = getFramebuffer();
+ rfb::Region clip(pb->getRect());
+
+ CConnection::framebufferUpdateEnd();
+
+ endCpuCounter();
+
+ decodeTime += getCpuCounter();
+
+ updates.getUpdateInfo(&ui, clip);
+
+ startCpuCounter();
+ sc->writeUpdate(ui, pb);
+ endCpuCounter();
+
+ encodeTime += getCpuCounter();
+}
+
+void CConn::dataRect(const rfb::Rect &r, int encoding)
+{
+ CConnection::dataRect(r, encoding);
+
+ if (encoding != rfb::encodingCopyRect) // FIXME
+ updates.add_changed(rfb::Region(r));
+}
+
+void CConn::setColourMapEntries(int, int, rdr::U16*)
+{
+}
+
+void CConn::bell()
+{
+}
+
+void CConn::serverCutText(const char*)
+{
+}
+
+Manager::Manager(class rfb::SConnection *conn) :
+ EncodeManager(conn)
+{
+}
+
+void Manager::getStats(double& ratio, unsigned long long& encodedBytes,
+ unsigned long long& rawEquivalent)
+{
+ StatsVector::iterator iter;
+ unsigned long long bytes, equivalent;
+
+ bytes = equivalent = 0;
+ for (iter = stats.begin(); iter != stats.end(); ++iter) {
+ StatsVector::value_type::iterator iter2;
+ for (iter2 = iter->begin(); iter2 != iter->end(); ++iter2) {
+ bytes += iter2->bytes;
+ equivalent += iter2->equivalent;
+ }
+ }
+
+ ratio = (double)equivalent / bytes;
+ encodedBytes = bytes;
+ rawEquivalent = equivalent;
+}
+
+SConn::SConn()
+{
+ out = new DummyOutStream;
+ setStreams(NULL, out);
+
+ setWriter(new rfb::SMsgWriter(&client, out));
+
+ manager = new Manager(this);
+}
+
+SConn::~SConn()
+{
+ delete manager;
+ delete out;
+}
+
+void SConn::writeUpdate(const rfb::UpdateInfo& ui, const rfb::PixelBuffer* pb)
+{
+ manager->writeUpdate(ui, pb, NULL);
+}
+
+void SConn::getStats(double& ratio, unsigned long long& bytes,
+ unsigned long long& rawEquivalent)
+{
+ manager->getStats(ratio, bytes, rawEquivalent);
+}
+
+void SConn::setAccessRights(AccessRights ar)
+{
+}
+
+void SConn::setDesktopSize(int fb_width, int fb_height,
+ const rfb::ScreenSet& layout)
+{
+}
+
+struct stats
+{
+ double decodeTime;
+ double encodeTime;
+ double realTime;
+
+ double ratio;
+ unsigned long long bytes;
+ unsigned long long rawEquivalent;
+};
+
+static struct stats runTest(const char *fn)
+{
+ CConn *cc;
+ struct stats s;
+ struct timeval start, stop;
+
+ gettimeofday(&start, NULL);
+
+ try {
+ cc = new CConn(fn);
+ } catch (rdr::Exception& e) {
+ fprintf(stderr, "Failed to open rfb file: %s\n", e.str());
+ exit(1);
+ }
+
+ try {
+ while (true)
+ cc->processMsg();
+ } catch (rdr::EndOfStream& e) {
+ } catch (rdr::Exception& e) {
+ fprintf(stderr, "Failed to run rfb file: %s\n", e.str());
+ exit(1);
+ }
+
+ gettimeofday(&stop, NULL);
+
+ s.decodeTime = cc->decodeTime;
+ s.encodeTime = cc->encodeTime;
+ s.realTime = (double)stop.tv_sec - start.tv_sec;
+ s.realTime += ((double)stop.tv_usec - start.tv_usec)/1000000.0;
+ cc->getStats(s.ratio, s.bytes, s.rawEquivalent);
+
+ delete cc;
+
+ return s;
+}
+
+static void sort(double *array, int count)
+{
+ bool sorted;
+ int i;
+ do {
+ sorted = true;
+ for (i = 1; i < count; i++) {
+ if (array[i-1] > array[i]) {
+ double d;
+ d = array[i];
+ array[i] = array[i - 1];
+ array[i - 1] = d;
+ sorted = false;
+ }
+ }
+ } while (!sorted);
+}
+
+static void usage(const char *argv0)
+{
+ fprintf(stderr, "Syntax: %s [options] <rfb file>\n", argv0);
+ fprintf(stderr, "Options:\n");
+ rfb::Configuration::listParams(79, 14);
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ int i;
+
+ const char *fn;
+
+ fn = NULL;
+ for (i = 1; i < argc; i++) {
+ if (rfb::Configuration::setParam(argv[i]))
+ continue;
+
+ if (argv[i][0] == '-') {
+ if (i + 1 < argc) {
+ if (rfb::Configuration::setParam(&argv[i][1], argv[i + 1])) {
+ i++;
+ continue;
+ }
+ }
+ usage(argv[0]);
+ }
+
+ if (fn != NULL)
+ usage(argv[0]);
+
+ fn = argv[i];
+ }
+
+ int runCount = count;
+ struct stats *runs = new struct stats[runCount];
+ double *values = new double[runCount];
+ double *dev = new double[runCount];
+ double median, meddev;
+
+ if (fn == NULL) {
+ fprintf(stderr, "No file specified!\n\n");
+ usage(argv[0]);
+ }
+
+ if (strcmp(format, "") == 0) {
+ fprintf(stderr, "Pixel format not specified!\n\n");
+ usage(argv[0]);
+ }
+
+ if (width == 0 || height == 0) {
+ fprintf(stderr, "Frame buffer size not specified!\n\n");
+ usage(argv[0]);
+ }
+
+ // Warmup
+ runTest(fn);
+
+ // Multiple runs to get a good average
+ for (i = 0; i < runCount; i++)
+ runs[i] = runTest(fn);
+
+ // Calculate median and median deviation for CPU usage decoding
+ for (i = 0;i < runCount;i++)
+ values[i] = runs[i].decodeTime;
+
+ sort(values, runCount);
+ median = values[runCount/2];
+
+ for (i = 0;i < runCount;i++)
+ dev[i] = fabs((values[i] - median) / median) * 100;
+
+ sort(dev, runCount);
+ meddev = dev[runCount/2];
+
+ printf("CPU time (decoding): %g s (+/- %g %%)\n", median, meddev);
+
+ // And for CPU usage encoding
+ for (i = 0;i < runCount;i++)
+ values[i] = runs[i].encodeTime;
+
+ sort(values, runCount);
+ median = values[runCount/2];
+
+ for (i = 0;i < runCount;i++)
+ dev[i] = fabs((values[i] - median) / median) * 100;
+
+ sort(dev, runCount);
+ meddev = dev[runCount/2];
+
+ printf("CPU time (encoding): %g s (+/- %g %%)\n", median, meddev);
+
+ // And for CPU core usage encoding
+ for (i = 0;i < runCount;i++)
+ values[i] = (runs[i].decodeTime + runs[i].encodeTime) / runs[i].realTime;
+
+ sort(values, runCount);
+ median = values[runCount/2];
+
+ for (i = 0;i < runCount;i++)
+ dev[i] = fabs((values[i] - median) / median) * 100;
+
+ sort(dev, runCount);
+ meddev = dev[runCount/2];
+
+ printf("Core usage (total): %g (+/- %g %%)\n", median, meddev);
+
+#ifdef WIN32
+ printf("Encoded bytes: %I64d\n", runs[0].bytes);
+ printf("Raw equivalent bytes: %I64d\n", runs[0].rawEquivalent);
+#else
+ printf("Encoded bytes: %lld\n", runs[0].bytes);
+ printf("Raw equivalent bytes: %lld\n", runs[0].rawEquivalent);
+#endif
+ printf("Ratio: %g\n", runs[0].ratio);
+
+ return 0;
+}
diff --git a/tests/perf/fbperf.cxx b/tests/perf/fbperf.cxx
new file mode 100644
index 00000000..a19ee479
--- /dev/null
+++ b/tests/perf/fbperf.cxx
@@ -0,0 +1,399 @@
+/* Copyright 2016 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#include <math.h>
+#include <sys/time.h>
+
+#include <FL/Fl.H>
+#include <FL/Fl_Window.H>
+#include <FL/fl_draw.H>
+
+#include <rdr/Exception.h>
+#include <rfb/util.h>
+
+#include "../vncviewer/PlatformPixelBuffer.h"
+
+#include "util.h"
+
+class TestWindow: public Fl_Window {
+public:
+ TestWindow();
+ ~TestWindow();
+
+ virtual void start(int width, int height);
+ virtual void stop();
+
+ virtual void draw();
+
+protected:
+ virtual void flush();
+
+ void update();
+ virtual void changefb();
+
+ static void timer(void* data);
+
+public:
+ unsigned long long pixels, frames;
+ double time;
+
+protected:
+ PlatformPixelBuffer* fb;
+};
+
+class PartialTestWindow: public TestWindow {
+protected:
+ virtual void changefb();
+};
+
+class OverlayTestWindow: public PartialTestWindow {
+public:
+ OverlayTestWindow();
+
+ virtual void start(int width, int height);
+ virtual void stop();
+
+ virtual void draw();
+
+protected:
+ Surface* overlay;
+ Surface* offscreen;
+};
+
+TestWindow::TestWindow() :
+ Fl_Window(0, 0, "Framebuffer Performance Test"),
+ fb(NULL)
+{
+}
+
+TestWindow::~TestWindow()
+{
+ stop();
+}
+
+void TestWindow::start(int width, int height)
+{
+ rdr::U32 pixel;
+
+ stop();
+
+ resize(x(), y(), width, height);
+
+ pixels = 0;
+ frames = 0;
+ time = 0;
+
+ fb = new PlatformPixelBuffer(w(), h());
+
+ pixel = 0;
+ fb->fillRect(fb->getRect(), &pixel);
+
+ show();
+}
+
+void TestWindow::stop()
+{
+ hide();
+
+ delete fb;
+ fb = NULL;
+
+ Fl::remove_idle(timer, this);
+}
+
+void TestWindow::draw()
+{
+ int X, Y, W, H;
+
+ // We cannot update the damage region from inside the draw function,
+ // so delegate this to an idle function
+ Fl::add_idle(timer, this);
+
+ // Check what actually needs updating
+ fl_clip_box(0, 0, w(), h(), X, Y, W, H);
+ if ((W == 0) || (H == 0))
+ return;
+
+ fb->draw(X, Y, X, Y, W, H);
+
+ pixels += W*H;
+ frames++;
+}
+
+void TestWindow::flush()
+{
+ startTimeCounter();
+ Fl_Window::flush();
+#if !defined(WIN32) && !defined(__APPLE__)
+ // Make sure we measure any work we queue up
+ XSync(fl_display, False);
+#endif
+ endTimeCounter();
+
+ time += getTimeCounter();
+}
+
+void TestWindow::update()
+{
+ rfb::Rect r;
+
+ startTimeCounter();
+
+ changefb();
+
+ r = fb->getDamage();
+ damage(FL_DAMAGE_USER1, r.tl.x, r.tl.y, r.width(), r.height());
+
+#if !defined(WIN32) && !defined(__APPLE__)
+ // Make sure we measure any work we queue up
+ XSync(fl_display, False);
+#endif
+
+ endTimeCounter();
+
+ time += getTimeCounter();
+}
+
+void TestWindow::changefb()
+{
+ rdr::U32 pixel;
+
+ pixel = rand();
+ fb->fillRect(fb->getRect(), &pixel);
+}
+
+void TestWindow::timer(void* data)
+{
+ TestWindow* self;
+
+ Fl::remove_idle(timer, data);
+
+ self = (TestWindow*)data;
+ self->update();
+}
+
+void PartialTestWindow::changefb()
+{
+ rfb::Rect r;
+ rdr::U32 pixel;
+
+ r = fb->getRect();
+ r.tl.x += w() / 4;
+ r.tl.y += h() / 4;
+ r.br.x -= w() / 4;
+ r.br.y -= h() / 4;
+
+ pixel = rand();
+ fb->fillRect(r, &pixel);
+}
+
+OverlayTestWindow::OverlayTestWindow() :
+ overlay(NULL), offscreen(NULL)
+{
+}
+
+void OverlayTestWindow::start(int width, int height)
+{
+ PartialTestWindow::start(width, height);
+
+ overlay = new Surface(400, 200);
+ overlay->clear(0xff, 0x80, 0x00, 0xcc);
+
+ // X11 needs an off screen buffer for compositing to avoid flicker,
+ // and alpha blending doesn't work for windows on Win32
+#if !defined(__APPLE__)
+ offscreen = new Surface(w(), h());
+#else
+ offscreen = NULL;
+#endif
+}
+
+void OverlayTestWindow::stop()
+{
+ PartialTestWindow::stop();
+
+ delete offscreen;
+ offscreen = NULL;
+ delete overlay;
+ overlay = NULL;
+}
+
+void OverlayTestWindow::draw()
+{
+ int ox, oy, ow, oh;
+ int X, Y, W, H;
+
+ // We cannot update the damage region from inside the draw function,
+ // so delegate this to an idle function
+ Fl::add_idle(timer, this);
+
+ // Check what actually needs updating
+ fl_clip_box(0, 0, w(), h(), X, Y, W, H);
+ if ((W == 0) || (H == 0))
+ return;
+
+ // We might get a redraw before we are fully ready
+ if (!overlay)
+ return;
+
+ // Simplify the clip region to a simple rectangle in order to
+ // properly draw all the layers even if they only partially overlap
+ fl_push_no_clip();
+ fl_push_clip(X, Y, W, H);
+
+ if (offscreen)
+ fb->draw(offscreen, X, Y, X, Y, W, H);
+ else
+ fb->draw(X, Y, X, Y, W, H);
+
+ pixels += W*H;
+ frames++;
+
+ ox = (w() - overlay->width()) / 2;
+ oy = h() / 4 - overlay->height() / 2;
+ ow = overlay->width();
+ oh = overlay->height();
+ fl_clip_box(ox, oy, ow, oh, X, Y, W, H);
+ if ((W != 0) && (H != 0)) {
+ if (offscreen)
+ overlay->blend(offscreen, X - ox, Y - oy, X, Y, W, H);
+ else
+ overlay->blend(X - ox, Y - oy, X, Y, W, H);
+ }
+
+ fl_pop_clip();
+ fl_pop_clip();
+
+ if (offscreen) {
+ fl_clip_box(0, 0, w(), h(), X, Y, W, H);
+ offscreen->draw(X, Y, X, Y, W, H);
+ }
+}
+
+static void dosubtest(TestWindow* win, int width, int height,
+ unsigned long long* pixels,
+ unsigned long long* frames,
+ double* time)
+{
+ struct timeval start;
+
+ win->start(width, height);
+
+ gettimeofday(&start, NULL);
+ while (rfb::msSince(&start) < 3000)
+ Fl::wait();
+
+ win->stop();
+
+ *pixels = win->pixels;
+ *frames = win->frames;
+ *time = win->time;
+}
+
+static bool is_constant(double a, double b)
+{
+ return (fabs(a - b) / a) < 0.1;
+}
+
+static void dotest(TestWindow* win)
+{
+ unsigned long long pixels[3];
+ unsigned long long frames[3];
+ double time[3];
+
+ double delay, rate;
+ char s[1024];
+
+ // Run the test several times at different resolutions...
+ dosubtest(win, 800, 600, &pixels[0], &frames[0], &time[0]);
+ dosubtest(win, 1024, 768, &pixels[1], &frames[1], &time[1]);
+ dosubtest(win, 1280, 960, &pixels[2], &frames[2], &time[2]);
+
+ // ...in order to compute how much of the rendering time is static,
+ // and how much depends on the number of pixels
+ // (i.e. solve: time = delay * frames + rate * pixels)
+ delay = (((time[0] - (double)pixels[0] / pixels[1] * time[1]) /
+ (frames[0] - (double)pixels[0] / pixels[1] * frames[1])) +
+ ((time[1] - (double)pixels[1] / pixels[2] * time[2]) /
+ (frames[1] - (double)pixels[1] / pixels[2] * frames[2]))) / 2.0;
+ rate = (((time[0] - (double)frames[0] / frames[1] * time[1]) /
+ (pixels[0] - (double)frames[0] / frames[1] * pixels[1])) +
+ ((time[1] - (double)frames[1] / frames[2] * time[2]) /
+ (pixels[1] - (double)frames[1] / frames[2] * pixels[2]))) / 2.0;
+
+ // However, we have some corner cases:
+
+ // We are restricted by some delay, e.g. refresh rate
+ if (is_constant(frames[0]/time[0], frames[2]/time[2])) {
+ fprintf(stderr, "WARNING: Fixed delay dominating updates.\n\n");
+ delay = time[2]/frames[2];
+ rate = 0.0;
+ }
+
+ // There isn't any fixed delay, we are only restricted by pixel
+ // throughput
+ if (fabs(delay) < 0.001) {
+ delay = 0.0;
+ rate = time[2]/pixels[2];
+ }
+
+ // We can hit cache limits that causes performance to drop
+ // with increasing update size, screwing up our calculations
+ if ((pixels[2] / time[2]) < (pixels[0] / time[0] * 0.9)) {
+ fprintf(stderr, "WARNING: Unexpected behaviour. Measurement unreliable.\n\n");
+
+ // We can't determine the proportions between these, so divide the
+ // time spent evenly
+ delay = time[2] / 2.0 / frames[2];
+ rate = time[2] / 2.0 / pixels[2];
+ }
+
+ fprintf(stderr, "Rendering delay: %g ms/frame\n", delay * 1000.0);
+ if (rate == 0.0)
+ strcpy(s, "N/A pixels/s");
+ else
+ rfb::siPrefix(1.0 / rate, "pixels/s", s, sizeof(s));
+ fprintf(stderr, "Rendering rate: %s\n", s);
+ fprintf(stderr, "Maximum FPS: %g fps @ 1920x1080\n",
+ 1.0 / (delay + rate * 1920 * 1080));
+}
+
+int main(int argc, char** argv)
+{
+ TestWindow* win;
+
+ fprintf(stderr, "Full window update:\n\n");
+ win = new TestWindow();
+ dotest(win);
+ delete win;
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, "Partial window update:\n\n");
+ win = new PartialTestWindow();
+ dotest(win);
+ delete win;
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, "Partial window update with overlay:\n\n");
+ win = new OverlayTestWindow();
+ dotest(win);
+ delete win;
+ fprintf(stderr, "\n");
+
+ return 0;
+}
diff --git a/tests/perf/results/multicore/README b/tests/perf/results/multicore/README
new file mode 100644
index 00000000..c93b2d7a
--- /dev/null
+++ b/tests/perf/results/multicore/README
@@ -0,0 +1,40 @@
+This directory contains the evaluation of the multi-core implementation
+in the decoder. The baseline is the performance before the addition of
+the DecodeManager class.
+
+Tests were performed on the following systems:
+
+ - eLux RP Atom N270 1.6 GHz
+ - Lubuntu 13.10 i.MX6 Quad 1.2 GHz
+ - Fedora 22 i7-3770 3.4 GHz
+ - Windows Vista Core 2 Duo E7400 2.8 GHz
+ - Windows 10 i3-4170 3.7 GHz
+ - OS X 10.6 Core 2 Duo 2.53 GHz
+ - OS X 10.11 i5 2.3 GHz
+
+The systems were tested with:
+
+ a) The old, baseline code
+ b) The new code with all CPUs enabled
+ c) The new code with only one CPU enabled
+
+The test itself consists of running decperf on the test files from the
+TurboVNC project. Rate of decoding is then compared to the baseline.
+Note that the CPU time is divided by core usage in the multi CPU cases
+in order to derive total decoding time. This method is sensitive to
+other load on the system.
+
+On average, there is no regression in performance for single CPU
+systems. This however relies on the addition of the single CPU shortcut
+in DecodeManager. Without that the performance sees a 10% lower rate.
+
+Dual CPU systems see between 20% and 50% increase, and the quad core
+systems between 75% and 125% on average. OS X is an outlier though in
+that it gets a mere 32% increase on average. It is unknown why at this
+point and tracing doesn't reveal anything obvious. It may be because it
+is not a true quad core system, but rather uses HyperThreading.
+
+So in summary, the new code can do a noticeable improvement on decoding
+time. However it does so at a cost of efficiency. Four times the CPUs
+only gives you about twice the performance. More improvements may be
+possible.
diff --git a/tests/perf/results/multicore/multicore.ods b/tests/perf/results/multicore/multicore.ods
new file mode 100644
index 00000000..42e024d6
--- /dev/null
+++ b/tests/perf/results/multicore/multicore.ods
Binary files differ
diff --git a/tests/perf/results/notrans/README b/tests/perf/results/notrans/README
new file mode 100644
index 00000000..3723e67d
--- /dev/null
+++ b/tests/perf/results/notrans/README
@@ -0,0 +1,28 @@
+This directory contains the test results in preparation for the removal
+of the PixelTransformer class.
+
+Tests were performed on Linux with these CPUs:
+
+ - Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz
+ - ARM i.MX6 DualLite @ 1 GHz
+
+The tests show that the new bufferFromBuffer() has similar performance
+as PixelTransformer in most cases. It gets beaten in two cases:
+
+ - Input format is 16 BPP and PixelTransformer is in the
+ non-economical mode (the default mode).
+
+ - Input format is 8 BPP, mode irrelevant.
+
+PixelTransformer is about twice as fast in both these cases (more if
+converting between two low colour formats).
+
+Although this is significant, it is in cases that are already difficult
+to deal with performance wise, and exceedingly rare with modern
+hardware. As such it is difficult to motivate the extra complexity that
+PixelTransformer requires.
+
+If it turns out that these cases are significant, than we can move
+PixelTransformer's massive lookup tables into a shared cache in
+PixelFormat. Implementation complexity would be similar, but at least
+we would have a friendly API.
diff --git a/tests/perf/results/notrans/armhf.csv b/tests/perf/results/notrans/armhf.csv
new file mode 100644
index 00000000..3ccb6d9b
--- /dev/null
+++ b/tests/perf/results/notrans/armhf.csv
@@ -0,0 +1,26 @@
+# Pixel Conversion Test 2014-07-09 14:23 UTC
+#
+# Frame buffer: 4096x4096 pixels
+# Tile size: 64x64 pixels
+#
+# Note: Results are Mpixels/sec
+#
+Source format,Destination Format,memcpy,PixelTransformer,bufferFromBuffer,rgbFromBuffer,bufferFromRGB
+
+depth 24 (32bpp) little-endian rgb888,depth 24 (32bpp) little-endian rgb888,67.5908,67.3684,66.8189,54.3236,59.1908
+depth 24 (32bpp) little-endian bgr888,depth 24 (32bpp) little-endian rgb888,67.5908,33.2468,45.2097,54.3236,58.8506
+depth 16 (16bpp) little-endian rgb565,depth 24 (32bpp) little-endian rgb888,62.0606,24.6896,16.8076,22.5924,59.5349
+depth 7 (8bpp) rgb232,depth 24 (32bpp) little-endian rgb888,61.6867,63.2099,19.5887,23.5132,59.7956
+
+depth 24 (32bpp) little-endian rgb888,depth 16 (16bpp) little-endian rgb565,96.8321,31.5562,40.3945,54.3236,25.0673
+depth 16 (16bpp) little-endian bgr565,depth 16 (16bpp) little-endian rgb565,104.757,31.7766,13.3725,22.5303,25.1443
+depth 7 (8bpp) rgb232,depth 16 (16bpp) little-endian rgb565,95.9251,61.594,15.1535,23.5132,25.098
+
+depth 24 (32bpp) little-endian rgb888,depth 7 (8bpp) rgb232,133.42,34.5654,32.6115,54.3236,33.0056
+depth 16 (16bpp) little-endian rgb565,depth 7 (8bpp) rgb232,133.42,42.1833,15.7842,22.5303,32.7942
+depth 7 (8bpp) bgr232,depth 7 (8bpp) rgb232,137.45,75.7116,16.8699,23.5132,33.0056
+
+depth 24 (32bpp) little-endian bgr888,depth 24 (32bpp) big-endian bgr888,67.5908,33.2738,45.2597,54.3957,58.9353
+depth 24 (32bpp) big-endian bgr888,depth 24 (32bpp) little-endian bgr888,67.7025,NaN,45.3097,54.1799,59.0202
+depth 16 (16bpp) little-endian bgr565,depth 16 (16bpp) big-endian bgr565,105.567,31.7766,13.8425,21.49,24.9756
+depth 16 (16bpp) big-endian bgr565,depth 16 (16bpp) little-endian bgr565,105.84,NaN,12.3746,20.9086,25.1443
diff --git a/tests/perf/results/notrans/i386.csv b/tests/perf/results/notrans/i386.csv
new file mode 100644
index 00000000..bb9247d3
--- /dev/null
+++ b/tests/perf/results/notrans/i386.csv
@@ -0,0 +1,26 @@
+# Pixel Conversion Test 2014-07-09 14:14 UTC
+#
+# Frame buffer: 4096x4096 pixels
+# Tile size: 64x64 pixels
+#
+# Note: Results are Mpixels/sec
+#
+Source format,Destination Format,memcpy,PixelTransformer,bufferFromBuffer,rgbFromBuffer,bufferFromRGB
+
+depth 24 (32bpp) little-endian rgb888,depth 24 (32bpp) little-endian rgb888,568.889,561.096,602.353,338.512,525.128
+depth 24 (32bpp) little-endian bgr888,depth 24 (32bpp) little-endian rgb888,561.096,215.579,280.548,338.512,525.128
+depth 16 (16bpp) little-endian rgb565,depth 24 (32bpp) little-endian rgb888,602.353,405.545,185.339,146.81,531.948
+depth 7 (8bpp) rgb232,depth 24 (32bpp) little-endian rgb888,640,531.948,192.3,170.667,546.133
+
+depth 24 (32bpp) little-endian rgb888,depth 16 (16bpp) little-endian rgb565,853.333,217.872,235.402,344.202,256
+depth 16 (16bpp) little-endian bgr565,depth 16 (16bpp) little-endian rgb565,871.489,455.111,89.4323,146.81,256
+depth 7 (8bpp) rgb232,depth 16 (16bpp) little-endian rgb565,952.558,568.889,94.8148,167.184,251.288
+
+depth 24 (32bpp) little-endian rgb888,depth 7 (8bpp) rgb232,1107.03,235.402,231.412,341.333,278.639
+depth 16 (16bpp) little-endian rgb565,depth 7 (8bpp) rgb232,1137.78,481.882,95.2558,144.735,276.757
+depth 7 (8bpp) bgr232,depth 7 (8bpp) rgb232,1204.71,553.514,101.136,169.256,280.548
+
+depth 24 (32bpp) little-endian bgr888,depth 24 (32bpp) big-endian bgr888,568.889,212.228,276.757,338.512,525.128
+depth 24 (32bpp) big-endian bgr888,depth 24 (32bpp) little-endian bgr888,576.901,NaN,278.639,338.512,525.128
+depth 16 (16bpp) little-endian bgr565,depth 16 (16bpp) big-endian bgr565,890.435,455.111,89.4323,146.81,243.81
+depth 16 (16bpp) big-endian bgr565,depth 16 (16bpp) little-endian bgr565,871.489,NaN,85.3333,146.286,254.41
diff --git a/tests/perf/results/notrans/x86_64.csv b/tests/perf/results/notrans/x86_64.csv
new file mode 100644
index 00000000..18fc03f3
--- /dev/null
+++ b/tests/perf/results/notrans/x86_64.csv
@@ -0,0 +1,26 @@
+# Pixel Conversion Test 2014-07-09 14:14 UTC
+#
+# Frame buffer: 4096x4096 pixels
+# Tile size: 64x64 pixels
+#
+# Note: Results are Mpixels/sec
+#
+Source format,Destination Format,memcpy,PixelTransformer,bufferFromBuffer,rgbFromBuffer,bufferFromRGB
+
+depth 24 (32bpp) little-endian rgb888,depth 24 (32bpp) little-endian rgb888,576.901,576.901,546.133,338.512,602.353
+depth 24 (32bpp) little-endian bgr888,depth 24 (32bpp) little-endian rgb888,585.143,251.288,288.451,335.738,602.353
+depth 16 (16bpp) little-endian rgb565,depth 24 (32bpp) little-endian rgb888,585.143,405.545,205.829,162.54,620.606
+depth 7 (8bpp) rgb232,depth 24 (32bpp) little-endian rgb888,493.494,505.679,267.712,177.316,620.606
+
+depth 24 (32bpp) little-endian rgb888,depth 16 (16bpp) little-endian rgb565,999.024,231.412,257.61,344.202,265.974
+depth 16 (16bpp) little-endian bgr565,depth 16 (16bpp) little-endian rgb565,975.238,455.111,101.638,165.161,267.712
+depth 7 (8bpp) rgb232,depth 16 (16bpp) little-endian rgb565,1050.26,576.901,105.296,181.239,269.474
+
+depth 24 (32bpp) little-endian rgb888,depth 7 (8bpp) rgb232,1638.4,259.24,271.258,347.119,298.978
+depth 16 (16bpp) little-endian rgb565,depth 7 (8bpp) rgb232,1575.38,505.679,105.026,165.161,294.676
+depth 7 (8bpp) bgr232,depth 7 (8bpp) rgb232,1706.67,602.353,107.225,183.677,298.978
+
+depth 24 (32bpp) little-endian bgr888,depth 24 (32bpp) big-endian bgr888,593.623,251.288,286.434,338.512,620.606
+depth 24 (32bpp) big-endian bgr888,depth 24 (32bpp) little-endian bgr888,593.623,NaN,282.483,344.202,611.343
+depth 16 (16bpp) little-endian bgr565,depth 16 (16bpp) big-endian bgr565,1050.26,450.11,97.7566,166.504,259.24
+depth 16 (16bpp) big-endian bgr565,depth 16 (16bpp) little-endian bgr565,999.024,NaN,96.6038,155.152,267.712
diff --git a/tests/perf/util.cxx b/tests/perf/util.cxx
new file mode 100644
index 00000000..17a83698
--- /dev/null
+++ b/tests/perf/util.cxx
@@ -0,0 +1,178 @@
+/* Copyright 2013-2014 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef WIN32
+#include <windows.h>
+#else
+#include <sys/resource.h>
+#include <sys/time.h>
+#endif
+
+#include "util.h"
+
+#ifdef WIN32
+typedef struct {
+ FILETIME kernelTime;
+ FILETIME userTime;
+} syscounter_t;
+#else
+typedef struct rusage syscounter_t;
+#endif
+
+static syscounter_t _globalCounter[2];
+static cpucounter_t globalCounter = _globalCounter;
+
+void startCpuCounter(void)
+{
+ startCpuCounter(globalCounter);
+}
+
+void endCpuCounter(void)
+{
+ endCpuCounter(globalCounter);
+}
+
+double getCpuCounter(void)
+{
+ return getCpuCounter(globalCounter);
+}
+
+cpucounter_t newCpuCounter(void)
+{
+ syscounter_t *c;
+
+ c = (syscounter_t*)malloc(sizeof(syscounter_t) * 2);
+ if (c == NULL)
+ return NULL;
+
+ memset(c, 0, sizeof(syscounter_t) * 2);
+
+ return c;
+}
+
+void freeCpuCounter(cpucounter_t c)
+{
+ free(c);
+}
+
+static void measureCpu(syscounter_t *counter)
+{
+#ifdef WIN32
+ FILETIME dummy1, dummy2;
+
+ GetProcessTimes(GetCurrentProcess(), &dummy1, &dummy2,
+ &counter->kernelTime, &counter->userTime);
+#else
+ getrusage(RUSAGE_SELF, counter);
+#endif
+}
+
+void startCpuCounter(cpucounter_t c)
+{
+ syscounter_t *s = (syscounter_t*)c;
+ measureCpu(&s[0]);
+}
+
+void endCpuCounter(cpucounter_t c)
+{
+ syscounter_t *s = (syscounter_t*)c;
+ measureCpu(&s[1]);
+}
+
+double getCpuCounter(cpucounter_t c)
+{
+ syscounter_t *s = (syscounter_t*)c;
+ double sysSeconds, userSeconds;
+
+#ifdef WIN32
+ uint64_t counters[2];
+
+ counters[0] = (uint64_t)s[0].kernelTime.dwHighDateTime << 32 |
+ s[0].kernelTime.dwLowDateTime;
+ counters[1] = (uint64_t)s[1].kernelTime.dwHighDateTime << 32 |
+ s[1].kernelTime.dwLowDateTime;
+
+ sysSeconds = (double)(counters[1] - counters[0]) / 10000000.0;
+
+ counters[0] = (uint64_t)s[0].userTime.dwHighDateTime << 32 |
+ s[0].userTime.dwLowDateTime;
+ counters[1] = (uint64_t)s[1].userTime.dwHighDateTime << 32 |
+ s[1].userTime.dwLowDateTime;
+
+ userSeconds = (double)(counters[1] - counters[0]) / 10000000.0;
+#else
+ sysSeconds = (double)(s[1].ru_stime.tv_sec -
+ s[0].ru_stime.tv_sec);
+ sysSeconds += (double)(s[1].ru_stime.tv_usec -
+ s[0].ru_stime.tv_usec) / 1000000.0;
+
+ userSeconds = (double)(s[1].ru_utime.tv_sec -
+ s[0].ru_utime.tv_sec);
+ userSeconds += (double)(s[1].ru_utime.tv_usec -
+ s[0].ru_utime.tv_usec) / 1000000.0;
+#endif
+
+ return sysSeconds + userSeconds;
+}
+
+#ifdef WIN32
+static LARGE_INTEGER timeStart, timeEnd;
+#else
+static struct timeval timeStart, timeEnd;
+#endif
+
+void startTimeCounter(void)
+{
+#ifdef WIN32
+ QueryPerformanceCounter(&timeStart);
+#else
+ gettimeofday(&timeStart, NULL);
+#endif
+}
+
+void endTimeCounter(void)
+{
+#ifdef WIN32
+ QueryPerformanceCounter(&timeEnd);
+#else
+ gettimeofday(&timeEnd, NULL);
+#endif
+}
+
+double getTimeCounter(void)
+{
+ double time;
+
+#ifdef WIN32
+ LARGE_INTEGER freq;
+
+ QueryPerformanceFrequency(&freq);
+
+ time = timeEnd.QuadPart - timeStart.QuadPart;
+ time = time / freq.QuadPart;
+#else
+ time = (double)timeEnd.tv_sec - timeStart.tv_sec;
+ time += (double)(timeEnd.tv_usec - timeStart.tv_usec) / 1000000.0;
+#endif
+
+ return time;
+}
diff --git a/tests/perf/util.h b/tests/perf/util.h
new file mode 100644
index 00000000..2b8ab4a8
--- /dev/null
+++ b/tests/perf/util.h
@@ -0,0 +1,42 @@
+/* Copyright 2013-2014 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#ifndef __TESTS_UTIL_H__
+#define __TESTS_UTIL_H__
+
+typedef void* cpucounter_t;
+
+void startCpuCounter(void);
+void endCpuCounter(void);
+
+double getCpuCounter(void);
+
+cpucounter_t newCpuCounter(void);
+void freeCpuCounter(cpucounter_t c);
+
+void startCpuCounter(cpucounter_t c);
+void endCpuCounter(cpucounter_t c);
+
+double getCpuCounter(cpucounter_t c);
+
+void startTimeCounter(void);
+void endTimeCounter(void);
+
+double getTimeCounter(void);
+
+#endif