diff options
author | Pierre Ossman <ossman@cendio.se> | 2019-09-09 12:48:25 +0200 |
---|---|---|
committer | Pierre Ossman <ossman@cendio.se> | 2019-09-25 15:50:07 +0200 |
commit | 94de4dd0c46483477706002e128a61f74278c0f6 (patch) | |
tree | 7e51c88049452f0a46faa54177ffd96dc2903efb /tests/perf | |
parent | d06906db59a6aa6434a5dd602b8d3c987d291ca8 (diff) | |
download | tigervnc-94de4dd0c46483477706002e128a61f74278c0f6.tar.gz tigervnc-94de4dd0c46483477706002e128a61f74278c0f6.zip |
Split test programs to benchmarking and unit tests
They have very different purpose, so make things easier to work
with by having multiple directories.
Diffstat (limited to 'tests/perf')
-rw-r--r-- | tests/perf/CMakeLists.txt | 40 | ||||
-rw-r--r-- | tests/perf/convperf.cxx | 224 | ||||
-rw-r--r-- | tests/perf/decperf.cxx | 240 | ||||
-rw-r--r-- | tests/perf/encperf.cxx | 506 | ||||
-rw-r--r-- | tests/perf/fbperf.cxx | 399 | ||||
-rw-r--r-- | tests/perf/results/multicore/README | 40 | ||||
-rw-r--r-- | tests/perf/results/multicore/multicore.ods | bin | 0 -> 42291 bytes | |||
-rw-r--r-- | tests/perf/results/notrans/README | 28 | ||||
-rw-r--r-- | tests/perf/results/notrans/armhf.csv | 26 | ||||
-rw-r--r-- | tests/perf/results/notrans/i386.csv | 26 | ||||
-rw-r--r-- | tests/perf/results/notrans/x86_64.csv | 26 | ||||
-rw-r--r-- | tests/perf/util.cxx | 178 | ||||
-rw-r--r-- | tests/perf/util.h | 42 |
13 files changed, 1775 insertions, 0 deletions
diff --git a/tests/perf/CMakeLists.txt b/tests/perf/CMakeLists.txt new file mode 100644 index 00000000..053bfaae --- /dev/null +++ b/tests/perf/CMakeLists.txt @@ -0,0 +1,40 @@ +include_directories(${FLTK_INCLUDE_DIR}) +include_directories(${GETTEXT_INCLUDE_DIR}) + +include_directories(${CMAKE_SOURCE_DIR}/common) + +add_library(test_util STATIC util.cxx) + +add_executable(convperf convperf.cxx) +target_link_libraries(convperf test_util rfb) + +add_executable(decperf decperf.cxx) +target_link_libraries(decperf test_util rfb) + +add_executable(encperf encperf.cxx) +target_link_libraries(encperf test_util rfb) + +set(FBPERF_SOURCES + fbperf.cxx + ${CMAKE_SOURCE_DIR}/vncviewer/PlatformPixelBuffer.cxx + ${CMAKE_SOURCE_DIR}/vncviewer/Surface.cxx) +if(WIN32) + set(FBPERF_SOURCES ${FBPERF_SOURCES} ${CMAKE_SOURCE_DIR}/vncviewer/Surface_Win32.cxx) +elseif(APPLE) + set(FBPERF_SOURCES + ${FBPERF_SOURCES} ${CMAKE_SOURCE_DIR}/vncviewer/Surface_OSX.cxx + ${FBPERF_SOURCES} ${CMAKE_SOURCE_DIR}/vncviewer/keysym2ucs.c + ${FBPERF_SOURCES} ${CMAKE_SOURCE_DIR}/vncviewer/cocoa.mm) +else() + set(FBPERF_SOURCES ${FBPERF_SOURCES} ${CMAKE_SOURCE_DIR}/vncviewer/Surface_X11.cxx) +endif() +add_executable(fbperf ${FBPERF_SOURCES}) +target_link_libraries(fbperf test_util rfb ${FLTK_LIBRARIES} ${GETTEXT_LIBRARIES}) +if(WIN32) + target_link_libraries(fbperf msimg32) +endif() +if(APPLE) + target_link_libraries(fbperf "-framework Cocoa") + target_link_libraries(fbperf "-framework Carbon") + target_link_libraries(fbperf "-framework IOKit") +endif() diff --git a/tests/perf/convperf.cxx b/tests/perf/convperf.cxx new file mode 100644 index 00000000..e4a3fd52 --- /dev/null +++ b/tests/perf/convperf.cxx @@ -0,0 +1,224 @@ +/* Copyright 2013-2014 Pierre Ossman <ossman@cendio.se> for Cendio AB + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this software; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include <rfb/PixelFormat.h> + +#include "util.h" + +static const int tile = 64; +static const int fbsize = 4096; + +static rdr::U8 *fb1, *fb2; + +typedef void (*testfn) (rfb::PixelFormat&, rfb::PixelFormat&, rdr::U8*, rdr::U8*); + +struct TestEntry { + const char *label; + testfn fn; +}; + +static void testMemcpy(rfb::PixelFormat &dstpf, rfb::PixelFormat &srcpf, + rdr::U8 *dst, rdr::U8 *src) +{ + int h; + h = tile; + while (h--) { + memcpy(dst, src, tile * dstpf.bpp/8); + dst += fbsize * dstpf.bpp/8; + src += fbsize * dstpf.bpp/8; + } +} + +static void testBuffer(rfb::PixelFormat &dstpf, rfb::PixelFormat &srcpf, + rdr::U8 *dst, rdr::U8 *src) +{ + dstpf.bufferFromBuffer(dst, srcpf, src, tile, tile, fbsize, fbsize); +} + +static void testToRGB(rfb::PixelFormat &dstpf, rfb::PixelFormat &srcpf, + rdr::U8 *dst, rdr::U8 *src) +{ + srcpf.rgbFromBuffer(dst, src, tile, fbsize, tile); +} + +static void testFromRGB(rfb::PixelFormat &dstpf, rfb::PixelFormat &srcpf, + rdr::U8 *dst, rdr::U8 *src) +{ + dstpf.bufferFromRGB(dst, src, tile, fbsize, tile); +} + +static void doTest(testfn fn, rfb::PixelFormat &dstpf, rfb::PixelFormat &srcpf) +{ + startCpuCounter(); + + for (int i = 0;i < 10000;i++) { + int x, y; + rdr::U8 *dst, *src; + x = rand() % (fbsize - tile); + y = rand() % (fbsize - tile); + dst = fb1 + (x + y * fbsize) * dstpf.bpp/8; + src = fb2 + (x + y * fbsize) * srcpf.bpp/8; + fn(dstpf, srcpf, dst, src); + } + + endCpuCounter(); + + float data, time; + + data = (double)tile * tile * 10000; + time = getCpuCounter(); + + printf("%g", data / (1000.0*1000.0) / time); +} + +struct TestEntry tests[] = { + {"memcpy", testMemcpy}, + {"bufferFromBuffer", testBuffer}, + {"rgbFromBuffer", testToRGB}, + {"bufferFromRGB", testFromRGB}, +}; + +static void doTests(rfb::PixelFormat &dstpf, rfb::PixelFormat &srcpf) +{ + size_t i; + char dstb[256], srcb[256]; + + dstpf.print(dstb, sizeof(dstb)); + srcpf.print(srcb, sizeof(srcb)); + + printf("%s,%s", srcb, dstb); + + for (i = 0;i < sizeof(tests)/sizeof(tests[0]);i++) { + printf(","); + doTest(tests[i].fn, dstpf, srcpf); + } + + printf("\n"); +} + +int main(int argc, char **argv) +{ + size_t bufsize; + + time_t t; + char datebuffer[256]; + + size_t i; + + bufsize = fbsize * fbsize * 4; + + fb1 = new rdr::U8[bufsize]; + fb2 = new rdr::U8[bufsize]; + + for (i = 0;i < bufsize;i++) { + fb1[i] = rand(); + fb2[i] = rand(); + } + + time(&t); + strftime(datebuffer, sizeof(datebuffer), "%Y-%m-%d %H:%M UTC", gmtime(&t)); + + printf("# Pixel Conversion Performance Test %s\n", datebuffer); + printf("#\n"); + printf("# Frame buffer: %dx%d pixels\n", fbsize, fbsize); + printf("# Tile size: %dx%d pixels\n", tile, tile); + printf("#\n"); + printf("# Note: Results are Mpixels/sec\n"); + printf("#\n"); + + printf("Source format,Destination Format"); + for (i = 0;i < sizeof(tests)/sizeof(tests[0]);i++) + printf(",%s", tests[i].label); + printf("\n"); + + rfb::PixelFormat dstpf, srcpf; + + /* rgb888 targets */ + + printf("\n"); + + dstpf.parse("rgb888"); + + srcpf.parse("rgb888"); + doTests(dstpf, srcpf); + + srcpf.parse("bgr888"); + doTests(dstpf, srcpf); + + srcpf.parse("rgb565"); + doTests(dstpf, srcpf); + + srcpf.parse("rgb232"); + doTests(dstpf, srcpf); + + /* rgb565 targets */ + + printf("\n"); + + dstpf.parse("rgb565"); + + srcpf.parse("rgb888"); + doTests(dstpf, srcpf); + + srcpf.parse("bgr565"); + doTests(dstpf, srcpf); + + srcpf.parse("rgb232"); + doTests(dstpf, srcpf); + + /* rgb232 targets */ + + printf("\n"); + + dstpf.parse("rgb232"); + + srcpf.parse("rgb888"); + doTests(dstpf, srcpf); + + srcpf.parse("rgb565"); + doTests(dstpf, srcpf); + + srcpf.parse("bgr232"); + doTests(dstpf, srcpf); + + /* rgb565 with endian conversion (both ways) */ + + printf("\n"); + + dstpf = rfb::PixelFormat(32, 24, false, true, 255, 255, 255, 0, 8, 16); + srcpf = rfb::PixelFormat(32, 24, true, true, 255, 255, 255, 0, 8, 16); + + doTests(srcpf, dstpf); + + doTests(dstpf, srcpf); + + dstpf = rfb::PixelFormat(16, 16, false, true, 31, 63, 31, 0, 5, 11); + srcpf = rfb::PixelFormat(16, 16, true, true, 31, 63, 31, 0, 5, 11); + + doTests(srcpf, dstpf); + + doTests(dstpf, srcpf); + + return 0; +} + diff --git a/tests/perf/decperf.cxx b/tests/perf/decperf.cxx new file mode 100644 index 00000000..df5214f2 --- /dev/null +++ b/tests/perf/decperf.cxx @@ -0,0 +1,240 @@ +/* Copyright 2015 Pierre Ossman <ossman@cendio.se> for Cendio AB + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this software; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + */ + +/* + * This program reads files produced by TightVNC's/TurboVNC's + * compare-encodings. It is basically a dump of the RFB protocol + * from the server side from the ServerInit message and forward. + * It is assumed that the client is using a bgr888 (LE) pixel + * format. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <sys/time.h> + +#include <rdr/Exception.h> +#include <rdr/FileInStream.h> + +#include <rfb/CConnection.h> +#include <rfb/CMsgReader.h> +#include <rfb/PixelBuffer.h> +#include <rfb/PixelFormat.h> + +#include "util.h" + +// FIXME: Files are always in this format +static const rfb::PixelFormat filePF(32, 24, false, true, 255, 255, 255, 0, 8, 16); + +class CConn : public rfb::CConnection { +public: + CConn(const char *filename); + ~CConn(); + + virtual void initDone(); + virtual void setPixelFormat(const rfb::PixelFormat& pf); + virtual void setCursor(int, int, const rfb::Point&, const rdr::U8*); + virtual void framebufferUpdateStart(); + virtual void framebufferUpdateEnd(); + virtual void setColourMapEntries(int, int, rdr::U16*); + virtual void bell(); + virtual void serverCutText(const char*); + +public: + double cpuTime; + +protected: + rdr::FileInStream *in; +}; + +CConn::CConn(const char *filename) +{ + cpuTime = 0.0; + + in = new rdr::FileInStream(filename); + setStreams(in, NULL); + + // Need to skip the initial handshake + setState(RFBSTATE_INITIALISATION); + // That also means that the reader and writer weren't setup + setReader(new rfb::CMsgReader(this, in)); +} + +CConn::~CConn() +{ + delete in; +} + +void CConn::initDone() +{ + setFramebuffer(new rfb::ManagedPixelBuffer(filePF, + server.width(), + server.height())); +} + +void CConn::setPixelFormat(const rfb::PixelFormat& pf) +{ + // Override format + CConnection::setPixelFormat(filePF); +} + +void CConn::setCursor(int, int, const rfb::Point&, const rdr::U8*) +{ +} + +void CConn::framebufferUpdateStart() +{ + CConnection::framebufferUpdateStart(); + + startCpuCounter(); +} + +void CConn::framebufferUpdateEnd() +{ + CConnection::framebufferUpdateEnd(); + + endCpuCounter(); + + cpuTime += getCpuCounter(); +} + +void CConn::setColourMapEntries(int, int, rdr::U16*) +{ +} + +void CConn::bell() +{ +} + +void CConn::serverCutText(const char*) +{ +} + +struct stats +{ + double decodeTime; + double realTime; +}; + +static struct stats runTest(const char *fn) +{ + CConn *cc; + struct timeval start, stop; + struct stats s; + + gettimeofday(&start, NULL); + + try { + cc = new CConn(fn); + } catch (rdr::Exception& e) { + fprintf(stderr, "Failed to open rfb file: %s\n", e.str()); + exit(1); + } + + try { + while (true) + cc->processMsg(); + } catch (rdr::EndOfStream& e) { + } catch (rdr::Exception& e) { + fprintf(stderr, "Failed to run rfb file: %s\n", e.str()); + exit(1); + } + + gettimeofday(&stop, NULL); + + s.decodeTime = cc->cpuTime; + s.realTime = (double)stop.tv_sec - start.tv_sec; + s.realTime += ((double)stop.tv_usec - start.tv_usec)/1000000.0; + + delete cc; + + return s; +} + +static void sort(double *array, int count) +{ + bool sorted; + int i; + do { + sorted = true; + for (i = 1;i < count;i++) { + if (array[i-1] > array[i]) { + double d; + d = array[i]; + array[i] = array[i-1]; + array[i-1] = d; + sorted = false; + } + } + } while (!sorted); +} + +static const int runCount = 9; + +int main(int argc, char **argv) +{ + int i; + struct stats runs[runCount]; + double values[runCount], dev[runCount]; + double median, meddev; + + if (argc != 2) { + printf("Syntax: %s <rfb file>\n", argv[0]); + return 1; + } + + // Warmup + runTest(argv[1]); + + // Multiple runs to get a good average + for (i = 0;i < runCount;i++) + runs[i] = runTest(argv[1]); + + // Calculate median and median deviation for CPU usage + for (i = 0;i < runCount;i++) + values[i] = runs[i].decodeTime; + + sort(values, runCount); + median = values[runCount/2]; + + for (i = 0;i < runCount;i++) + dev[i] = fabs((values[i] - median) / median) * 100; + + sort(dev, runCount); + meddev = dev[runCount/2]; + + printf("CPU time: %g s (+/- %g %%)\n", median, meddev); + + // And for CPU core usage + for (i = 0;i < runCount;i++) + values[i] = runs[i].decodeTime / runs[i].realTime; + + sort(values, runCount); + median = values[runCount/2]; + + for (i = 0;i < runCount;i++) + dev[i] = fabs((values[i] - median) / median) * 100; + + sort(dev, runCount); + meddev = dev[runCount/2]; + + printf("Core usage: %g (+/- %g %%)\n", median, meddev); + + return 0; +} diff --git a/tests/perf/encperf.cxx b/tests/perf/encperf.cxx new file mode 100644 index 00000000..e461197e --- /dev/null +++ b/tests/perf/encperf.cxx @@ -0,0 +1,506 @@ +/* Copyright 2015 Pierre Ossman <ossman@cendio.se> for Cendio AB + * Copyright (C) 2015 D. R. Commander. All Rights Reserved. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this software; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + */ + +/* + * This program reads files produced by TightVNC's/TurboVNC's + * fbs-dump, which in turn takes files from rfbproxy. It is + * basically a dump of the RFB protocol from the server side after + * the ServerInit message. Mostly this consists of FramebufferUpdate + * message using the HexTile encoding. Screen size and pixel format + * are not encoded in the file and must be specified by the user. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <sys/time.h> + +#include <rdr/Exception.h> +#include <rdr/OutStream.h> +#include <rdr/FileInStream.h> + +#include <rfb/PixelFormat.h> + +#include <rfb/CConnection.h> +#include <rfb/CMsgReader.h> +#include <rfb/UpdateTracker.h> + +#include <rfb/EncodeManager.h> +#include <rfb/SConnection.h> +#include <rfb/SMsgWriter.h> + +#include "util.h" + +static rfb::IntParameter width("width", "Frame buffer width", 0); +static rfb::IntParameter height("height", "Frame buffer height", 0); +static rfb::IntParameter count("count", "Number of benchmark iterations", 9); + +static rfb::StringParameter format("format", "Pixel format (e.g. bgr888)", ""); + +static rfb::BoolParameter translate("translate", + "Translate 8-bit and 16-bit datasets into 24-bit", + true); + +// The frame buffer (and output) is always this format +static const rfb::PixelFormat fbPF(32, 24, false, true, 255, 255, 255, 0, 8, 16); + +// Encodings to use +static const rdr::S32 encodings[] = { + rfb::encodingTight, rfb::encodingCopyRect, rfb::encodingRRE, + rfb::encodingHextile, rfb::encodingZRLE, rfb::pseudoEncodingLastRect, + rfb::pseudoEncodingQualityLevel0 + 8, + rfb::pseudoEncodingCompressLevel0 + 2}; + +class DummyOutStream : public rdr::OutStream { +public: + DummyOutStream(); + + virtual int length(); + virtual void flush(); + +private: + virtual int overrun(int itemSize, int nItems); + + int offset; + rdr::U8 buf[131072]; +}; + +class CConn : public rfb::CConnection { +public: + CConn(const char *filename); + ~CConn(); + + void getStats(double& ratio, unsigned long long& bytes, + unsigned long long& rawEquivalent); + + virtual void initDone(); + virtual void setCursor(int, int, const rfb::Point&, const rdr::U8*); + virtual void framebufferUpdateStart(); + virtual void framebufferUpdateEnd(); + virtual void dataRect(const rfb::Rect&, int); + virtual void setColourMapEntries(int, int, rdr::U16*); + virtual void bell(); + virtual void serverCutText(const char*); + +public: + double decodeTime; + double encodeTime; + +protected: + rdr::FileInStream *in; + rfb::SimpleUpdateTracker updates; + class SConn *sc; +}; + +class Manager : public rfb::EncodeManager { +public: + Manager(class rfb::SConnection *conn); + + void getStats(double&, unsigned long long&, unsigned long long&); +}; + +class SConn : public rfb::SConnection { +public: + SConn(); + ~SConn(); + + void writeUpdate(const rfb::UpdateInfo& ui, const rfb::PixelBuffer* pb); + + void getStats(double&, unsigned long long&, unsigned long long&); + + virtual void setAccessRights(AccessRights ar); + + virtual void setDesktopSize(int fb_width, int fb_height, + const rfb::ScreenSet& layout); + +protected: + DummyOutStream *out; + Manager *manager; +}; + +DummyOutStream::DummyOutStream() +{ + offset = 0; + ptr = buf; + end = buf + sizeof(buf); +} + +int DummyOutStream::length() +{ + flush(); + return offset; +} + +void DummyOutStream::flush() +{ + offset += ptr - buf; + ptr = buf; +} + +int DummyOutStream::overrun(int itemSize, int nItems) +{ + flush(); + if (itemSize * nItems > end - ptr) + nItems = (end - ptr) / itemSize; + return nItems; +} + +CConn::CConn(const char *filename) +{ + decodeTime = 0.0; + encodeTime = 0.0; + + in = new rdr::FileInStream(filename); + setStreams(in, NULL); + + // Need to skip the initial handshake and ServerInit + setState(RFBSTATE_NORMAL); + // That also means that the reader and writer weren't setup + setReader(new rfb::CMsgReader(this, in)); + // Nor the frame buffer size and format + rfb::PixelFormat pf; + pf.parse(format); + setPixelFormat(pf); + setDesktopSize(width, height); + + sc = new SConn(); + sc->client.setPF((bool)translate ? fbPF : pf); + sc->setEncodings(sizeof(encodings) / sizeof(*encodings), encodings); +} + +CConn::~CConn() +{ + delete sc; + delete in; +} + +void CConn::getStats(double& ratio, unsigned long long& bytes, + unsigned long long& rawEquivalent) +{ + sc->getStats(ratio, bytes, rawEquivalent); +} + +void CConn::initDone() +{ + rfb::ModifiablePixelBuffer *pb; + + pb = new rfb::ManagedPixelBuffer((bool)translate ? fbPF : server.pf(), + server.width(), server.height()); + setFramebuffer(pb); +} + +void CConn::setCursor(int, int, const rfb::Point&, const rdr::U8*) +{ +} + +void CConn::framebufferUpdateStart() +{ + CConnection::framebufferUpdateStart(); + + updates.clear(); + startCpuCounter(); +} + +void CConn::framebufferUpdateEnd() +{ + rfb::UpdateInfo ui; + rfb::PixelBuffer* pb = getFramebuffer(); + rfb::Region clip(pb->getRect()); + + CConnection::framebufferUpdateEnd(); + + endCpuCounter(); + + decodeTime += getCpuCounter(); + + updates.getUpdateInfo(&ui, clip); + + startCpuCounter(); + sc->writeUpdate(ui, pb); + endCpuCounter(); + + encodeTime += getCpuCounter(); +} + +void CConn::dataRect(const rfb::Rect &r, int encoding) +{ + CConnection::dataRect(r, encoding); + + if (encoding != rfb::encodingCopyRect) // FIXME + updates.add_changed(rfb::Region(r)); +} + +void CConn::setColourMapEntries(int, int, rdr::U16*) +{ +} + +void CConn::bell() +{ +} + +void CConn::serverCutText(const char*) +{ +} + +Manager::Manager(class rfb::SConnection *conn) : + EncodeManager(conn) +{ +} + +void Manager::getStats(double& ratio, unsigned long long& encodedBytes, + unsigned long long& rawEquivalent) +{ + StatsVector::iterator iter; + unsigned long long bytes, equivalent; + + bytes = equivalent = 0; + for (iter = stats.begin(); iter != stats.end(); ++iter) { + StatsVector::value_type::iterator iter2; + for (iter2 = iter->begin(); iter2 != iter->end(); ++iter2) { + bytes += iter2->bytes; + equivalent += iter2->equivalent; + } + } + + ratio = (double)equivalent / bytes; + encodedBytes = bytes; + rawEquivalent = equivalent; +} + +SConn::SConn() +{ + out = new DummyOutStream; + setStreams(NULL, out); + + setWriter(new rfb::SMsgWriter(&client, out)); + + manager = new Manager(this); +} + +SConn::~SConn() +{ + delete manager; + delete out; +} + +void SConn::writeUpdate(const rfb::UpdateInfo& ui, const rfb::PixelBuffer* pb) +{ + manager->writeUpdate(ui, pb, NULL); +} + +void SConn::getStats(double& ratio, unsigned long long& bytes, + unsigned long long& rawEquivalent) +{ + manager->getStats(ratio, bytes, rawEquivalent); +} + +void SConn::setAccessRights(AccessRights ar) +{ +} + +void SConn::setDesktopSize(int fb_width, int fb_height, + const rfb::ScreenSet& layout) +{ +} + +struct stats +{ + double decodeTime; + double encodeTime; + double realTime; + + double ratio; + unsigned long long bytes; + unsigned long long rawEquivalent; +}; + +static struct stats runTest(const char *fn) +{ + CConn *cc; + struct stats s; + struct timeval start, stop; + + gettimeofday(&start, NULL); + + try { + cc = new CConn(fn); + } catch (rdr::Exception& e) { + fprintf(stderr, "Failed to open rfb file: %s\n", e.str()); + exit(1); + } + + try { + while (true) + cc->processMsg(); + } catch (rdr::EndOfStream& e) { + } catch (rdr::Exception& e) { + fprintf(stderr, "Failed to run rfb file: %s\n", e.str()); + exit(1); + } + + gettimeofday(&stop, NULL); + + s.decodeTime = cc->decodeTime; + s.encodeTime = cc->encodeTime; + s.realTime = (double)stop.tv_sec - start.tv_sec; + s.realTime += ((double)stop.tv_usec - start.tv_usec)/1000000.0; + cc->getStats(s.ratio, s.bytes, s.rawEquivalent); + + delete cc; + + return s; +} + +static void sort(double *array, int count) +{ + bool sorted; + int i; + do { + sorted = true; + for (i = 1; i < count; i++) { + if (array[i-1] > array[i]) { + double d; + d = array[i]; + array[i] = array[i - 1]; + array[i - 1] = d; + sorted = false; + } + } + } while (!sorted); +} + +static void usage(const char *argv0) +{ + fprintf(stderr, "Syntax: %s [options] <rfb file>\n", argv0); + fprintf(stderr, "Options:\n"); + rfb::Configuration::listParams(79, 14); + exit(1); +} + +int main(int argc, char **argv) +{ + int i; + + const char *fn; + + fn = NULL; + for (i = 1; i < argc; i++) { + if (rfb::Configuration::setParam(argv[i])) + continue; + + if (argv[i][0] == '-') { + if (i + 1 < argc) { + if (rfb::Configuration::setParam(&argv[i][1], argv[i + 1])) { + i++; + continue; + } + } + usage(argv[0]); + } + + if (fn != NULL) + usage(argv[0]); + + fn = argv[i]; + } + + int runCount = count; + struct stats *runs = new struct stats[runCount]; + double *values = new double[runCount]; + double *dev = new double[runCount]; + double median, meddev; + + if (fn == NULL) { + fprintf(stderr, "No file specified!\n\n"); + usage(argv[0]); + } + + if (strcmp(format, "") == 0) { + fprintf(stderr, "Pixel format not specified!\n\n"); + usage(argv[0]); + } + + if (width == 0 || height == 0) { + fprintf(stderr, "Frame buffer size not specified!\n\n"); + usage(argv[0]); + } + + // Warmup + runTest(fn); + + // Multiple runs to get a good average + for (i = 0; i < runCount; i++) + runs[i] = runTest(fn); + + // Calculate median and median deviation for CPU usage decoding + for (i = 0;i < runCount;i++) + values[i] = runs[i].decodeTime; + + sort(values, runCount); + median = values[runCount/2]; + + for (i = 0;i < runCount;i++) + dev[i] = fabs((values[i] - median) / median) * 100; + + sort(dev, runCount); + meddev = dev[runCount/2]; + + printf("CPU time (decoding): %g s (+/- %g %%)\n", median, meddev); + + // And for CPU usage encoding + for (i = 0;i < runCount;i++) + values[i] = runs[i].encodeTime; + + sort(values, runCount); + median = values[runCount/2]; + + for (i = 0;i < runCount;i++) + dev[i] = fabs((values[i] - median) / median) * 100; + + sort(dev, runCount); + meddev = dev[runCount/2]; + + printf("CPU time (encoding): %g s (+/- %g %%)\n", median, meddev); + + // And for CPU core usage encoding + for (i = 0;i < runCount;i++) + values[i] = (runs[i].decodeTime + runs[i].encodeTime) / runs[i].realTime; + + sort(values, runCount); + median = values[runCount/2]; + + for (i = 0;i < runCount;i++) + dev[i] = fabs((values[i] - median) / median) * 100; + + sort(dev, runCount); + meddev = dev[runCount/2]; + + printf("Core usage (total): %g (+/- %g %%)\n", median, meddev); + +#ifdef WIN32 + printf("Encoded bytes: %I64d\n", runs[0].bytes); + printf("Raw equivalent bytes: %I64d\n", runs[0].rawEquivalent); +#else + printf("Encoded bytes: %lld\n", runs[0].bytes); + printf("Raw equivalent bytes: %lld\n", runs[0].rawEquivalent); +#endif + printf("Ratio: %g\n", runs[0].ratio); + + return 0; +} diff --git a/tests/perf/fbperf.cxx b/tests/perf/fbperf.cxx new file mode 100644 index 00000000..a19ee479 --- /dev/null +++ b/tests/perf/fbperf.cxx @@ -0,0 +1,399 @@ +/* Copyright 2016 Pierre Ossman <ossman@cendio.se> for Cendio AB + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this software; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + */ + +#include <math.h> +#include <sys/time.h> + +#include <FL/Fl.H> +#include <FL/Fl_Window.H> +#include <FL/fl_draw.H> + +#include <rdr/Exception.h> +#include <rfb/util.h> + +#include "../vncviewer/PlatformPixelBuffer.h" + +#include "util.h" + +class TestWindow: public Fl_Window { +public: + TestWindow(); + ~TestWindow(); + + virtual void start(int width, int height); + virtual void stop(); + + virtual void draw(); + +protected: + virtual void flush(); + + void update(); + virtual void changefb(); + + static void timer(void* data); + +public: + unsigned long long pixels, frames; + double time; + +protected: + PlatformPixelBuffer* fb; +}; + +class PartialTestWindow: public TestWindow { +protected: + virtual void changefb(); +}; + +class OverlayTestWindow: public PartialTestWindow { +public: + OverlayTestWindow(); + + virtual void start(int width, int height); + virtual void stop(); + + virtual void draw(); + +protected: + Surface* overlay; + Surface* offscreen; +}; + +TestWindow::TestWindow() : + Fl_Window(0, 0, "Framebuffer Performance Test"), + fb(NULL) +{ +} + +TestWindow::~TestWindow() +{ + stop(); +} + +void TestWindow::start(int width, int height) +{ + rdr::U32 pixel; + + stop(); + + resize(x(), y(), width, height); + + pixels = 0; + frames = 0; + time = 0; + + fb = new PlatformPixelBuffer(w(), h()); + + pixel = 0; + fb->fillRect(fb->getRect(), &pixel); + + show(); +} + +void TestWindow::stop() +{ + hide(); + + delete fb; + fb = NULL; + + Fl::remove_idle(timer, this); +} + +void TestWindow::draw() +{ + int X, Y, W, H; + + // We cannot update the damage region from inside the draw function, + // so delegate this to an idle function + Fl::add_idle(timer, this); + + // Check what actually needs updating + fl_clip_box(0, 0, w(), h(), X, Y, W, H); + if ((W == 0) || (H == 0)) + return; + + fb->draw(X, Y, X, Y, W, H); + + pixels += W*H; + frames++; +} + +void TestWindow::flush() +{ + startTimeCounter(); + Fl_Window::flush(); +#if !defined(WIN32) && !defined(__APPLE__) + // Make sure we measure any work we queue up + XSync(fl_display, False); +#endif + endTimeCounter(); + + time += getTimeCounter(); +} + +void TestWindow::update() +{ + rfb::Rect r; + + startTimeCounter(); + + changefb(); + + r = fb->getDamage(); + damage(FL_DAMAGE_USER1, r.tl.x, r.tl.y, r.width(), r.height()); + +#if !defined(WIN32) && !defined(__APPLE__) + // Make sure we measure any work we queue up + XSync(fl_display, False); +#endif + + endTimeCounter(); + + time += getTimeCounter(); +} + +void TestWindow::changefb() +{ + rdr::U32 pixel; + + pixel = rand(); + fb->fillRect(fb->getRect(), &pixel); +} + +void TestWindow::timer(void* data) +{ + TestWindow* self; + + Fl::remove_idle(timer, data); + + self = (TestWindow*)data; + self->update(); +} + +void PartialTestWindow::changefb() +{ + rfb::Rect r; + rdr::U32 pixel; + + r = fb->getRect(); + r.tl.x += w() / 4; + r.tl.y += h() / 4; + r.br.x -= w() / 4; + r.br.y -= h() / 4; + + pixel = rand(); + fb->fillRect(r, &pixel); +} + +OverlayTestWindow::OverlayTestWindow() : + overlay(NULL), offscreen(NULL) +{ +} + +void OverlayTestWindow::start(int width, int height) +{ + PartialTestWindow::start(width, height); + + overlay = new Surface(400, 200); + overlay->clear(0xff, 0x80, 0x00, 0xcc); + + // X11 needs an off screen buffer for compositing to avoid flicker, + // and alpha blending doesn't work for windows on Win32 +#if !defined(__APPLE__) + offscreen = new Surface(w(), h()); +#else + offscreen = NULL; +#endif +} + +void OverlayTestWindow::stop() +{ + PartialTestWindow::stop(); + + delete offscreen; + offscreen = NULL; + delete overlay; + overlay = NULL; +} + +void OverlayTestWindow::draw() +{ + int ox, oy, ow, oh; + int X, Y, W, H; + + // We cannot update the damage region from inside the draw function, + // so delegate this to an idle function + Fl::add_idle(timer, this); + + // Check what actually needs updating + fl_clip_box(0, 0, w(), h(), X, Y, W, H); + if ((W == 0) || (H == 0)) + return; + + // We might get a redraw before we are fully ready + if (!overlay) + return; + + // Simplify the clip region to a simple rectangle in order to + // properly draw all the layers even if they only partially overlap + fl_push_no_clip(); + fl_push_clip(X, Y, W, H); + + if (offscreen) + fb->draw(offscreen, X, Y, X, Y, W, H); + else + fb->draw(X, Y, X, Y, W, H); + + pixels += W*H; + frames++; + + ox = (w() - overlay->width()) / 2; + oy = h() / 4 - overlay->height() / 2; + ow = overlay->width(); + oh = overlay->height(); + fl_clip_box(ox, oy, ow, oh, X, Y, W, H); + if ((W != 0) && (H != 0)) { + if (offscreen) + overlay->blend(offscreen, X - ox, Y - oy, X, Y, W, H); + else + overlay->blend(X - ox, Y - oy, X, Y, W, H); + } + + fl_pop_clip(); + fl_pop_clip(); + + if (offscreen) { + fl_clip_box(0, 0, w(), h(), X, Y, W, H); + offscreen->draw(X, Y, X, Y, W, H); + } +} + +static void dosubtest(TestWindow* win, int width, int height, + unsigned long long* pixels, + unsigned long long* frames, + double* time) +{ + struct timeval start; + + win->start(width, height); + + gettimeofday(&start, NULL); + while (rfb::msSince(&start) < 3000) + Fl::wait(); + + win->stop(); + + *pixels = win->pixels; + *frames = win->frames; + *time = win->time; +} + +static bool is_constant(double a, double b) +{ + return (fabs(a - b) / a) < 0.1; +} + +static void dotest(TestWindow* win) +{ + unsigned long long pixels[3]; + unsigned long long frames[3]; + double time[3]; + + double delay, rate; + char s[1024]; + + // Run the test several times at different resolutions... + dosubtest(win, 800, 600, &pixels[0], &frames[0], &time[0]); + dosubtest(win, 1024, 768, &pixels[1], &frames[1], &time[1]); + dosubtest(win, 1280, 960, &pixels[2], &frames[2], &time[2]); + + // ...in order to compute how much of the rendering time is static, + // and how much depends on the number of pixels + // (i.e. solve: time = delay * frames + rate * pixels) + delay = (((time[0] - (double)pixels[0] / pixels[1] * time[1]) / + (frames[0] - (double)pixels[0] / pixels[1] * frames[1])) + + ((time[1] - (double)pixels[1] / pixels[2] * time[2]) / + (frames[1] - (double)pixels[1] / pixels[2] * frames[2]))) / 2.0; + rate = (((time[0] - (double)frames[0] / frames[1] * time[1]) / + (pixels[0] - (double)frames[0] / frames[1] * pixels[1])) + + ((time[1] - (double)frames[1] / frames[2] * time[2]) / + (pixels[1] - (double)frames[1] / frames[2] * pixels[2]))) / 2.0; + + // However, we have some corner cases: + + // We are restricted by some delay, e.g. refresh rate + if (is_constant(frames[0]/time[0], frames[2]/time[2])) { + fprintf(stderr, "WARNING: Fixed delay dominating updates.\n\n"); + delay = time[2]/frames[2]; + rate = 0.0; + } + + // There isn't any fixed delay, we are only restricted by pixel + // throughput + if (fabs(delay) < 0.001) { + delay = 0.0; + rate = time[2]/pixels[2]; + } + + // We can hit cache limits that causes performance to drop + // with increasing update size, screwing up our calculations + if ((pixels[2] / time[2]) < (pixels[0] / time[0] * 0.9)) { + fprintf(stderr, "WARNING: Unexpected behaviour. Measurement unreliable.\n\n"); + + // We can't determine the proportions between these, so divide the + // time spent evenly + delay = time[2] / 2.0 / frames[2]; + rate = time[2] / 2.0 / pixels[2]; + } + + fprintf(stderr, "Rendering delay: %g ms/frame\n", delay * 1000.0); + if (rate == 0.0) + strcpy(s, "N/A pixels/s"); + else + rfb::siPrefix(1.0 / rate, "pixels/s", s, sizeof(s)); + fprintf(stderr, "Rendering rate: %s\n", s); + fprintf(stderr, "Maximum FPS: %g fps @ 1920x1080\n", + 1.0 / (delay + rate * 1920 * 1080)); +} + +int main(int argc, char** argv) +{ + TestWindow* win; + + fprintf(stderr, "Full window update:\n\n"); + win = new TestWindow(); + dotest(win); + delete win; + fprintf(stderr, "\n"); + + fprintf(stderr, "Partial window update:\n\n"); + win = new PartialTestWindow(); + dotest(win); + delete win; + fprintf(stderr, "\n"); + + fprintf(stderr, "Partial window update with overlay:\n\n"); + win = new OverlayTestWindow(); + dotest(win); + delete win; + fprintf(stderr, "\n"); + + return 0; +} diff --git a/tests/perf/results/multicore/README b/tests/perf/results/multicore/README new file mode 100644 index 00000000..c93b2d7a --- /dev/null +++ b/tests/perf/results/multicore/README @@ -0,0 +1,40 @@ +This directory contains the evaluation of the multi-core implementation +in the decoder. The baseline is the performance before the addition of +the DecodeManager class. + +Tests were performed on the following systems: + + - eLux RP Atom N270 1.6 GHz + - Lubuntu 13.10 i.MX6 Quad 1.2 GHz + - Fedora 22 i7-3770 3.4 GHz + - Windows Vista Core 2 Duo E7400 2.8 GHz + - Windows 10 i3-4170 3.7 GHz + - OS X 10.6 Core 2 Duo 2.53 GHz + - OS X 10.11 i5 2.3 GHz + +The systems were tested with: + + a) The old, baseline code + b) The new code with all CPUs enabled + c) The new code with only one CPU enabled + +The test itself consists of running decperf on the test files from the +TurboVNC project. Rate of decoding is then compared to the baseline. +Note that the CPU time is divided by core usage in the multi CPU cases +in order to derive total decoding time. This method is sensitive to +other load on the system. + +On average, there is no regression in performance for single CPU +systems. This however relies on the addition of the single CPU shortcut +in DecodeManager. Without that the performance sees a 10% lower rate. + +Dual CPU systems see between 20% and 50% increase, and the quad core +systems between 75% and 125% on average. OS X is an outlier though in +that it gets a mere 32% increase on average. It is unknown why at this +point and tracing doesn't reveal anything obvious. It may be because it +is not a true quad core system, but rather uses HyperThreading. + +So in summary, the new code can do a noticeable improvement on decoding +time. However it does so at a cost of efficiency. Four times the CPUs +only gives you about twice the performance. More improvements may be +possible. diff --git a/tests/perf/results/multicore/multicore.ods b/tests/perf/results/multicore/multicore.ods Binary files differnew file mode 100644 index 00000000..42e024d6 --- /dev/null +++ b/tests/perf/results/multicore/multicore.ods diff --git a/tests/perf/results/notrans/README b/tests/perf/results/notrans/README new file mode 100644 index 00000000..3723e67d --- /dev/null +++ b/tests/perf/results/notrans/README @@ -0,0 +1,28 @@ +This directory contains the test results in preparation for the removal +of the PixelTransformer class. + +Tests were performed on Linux with these CPUs: + + - Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz + - ARM i.MX6 DualLite @ 1 GHz + +The tests show that the new bufferFromBuffer() has similar performance +as PixelTransformer in most cases. It gets beaten in two cases: + + - Input format is 16 BPP and PixelTransformer is in the + non-economical mode (the default mode). + + - Input format is 8 BPP, mode irrelevant. + +PixelTransformer is about twice as fast in both these cases (more if +converting between two low colour formats). + +Although this is significant, it is in cases that are already difficult +to deal with performance wise, and exceedingly rare with modern +hardware. As such it is difficult to motivate the extra complexity that +PixelTransformer requires. + +If it turns out that these cases are significant, than we can move +PixelTransformer's massive lookup tables into a shared cache in +PixelFormat. Implementation complexity would be similar, but at least +we would have a friendly API. diff --git a/tests/perf/results/notrans/armhf.csv b/tests/perf/results/notrans/armhf.csv new file mode 100644 index 00000000..3ccb6d9b --- /dev/null +++ b/tests/perf/results/notrans/armhf.csv @@ -0,0 +1,26 @@ +# Pixel Conversion Test 2014-07-09 14:23 UTC +# +# Frame buffer: 4096x4096 pixels +# Tile size: 64x64 pixels +# +# Note: Results are Mpixels/sec +# +Source format,Destination Format,memcpy,PixelTransformer,bufferFromBuffer,rgbFromBuffer,bufferFromRGB + +depth 24 (32bpp) little-endian rgb888,depth 24 (32bpp) little-endian rgb888,67.5908,67.3684,66.8189,54.3236,59.1908 +depth 24 (32bpp) little-endian bgr888,depth 24 (32bpp) little-endian rgb888,67.5908,33.2468,45.2097,54.3236,58.8506 +depth 16 (16bpp) little-endian rgb565,depth 24 (32bpp) little-endian rgb888,62.0606,24.6896,16.8076,22.5924,59.5349 +depth 7 (8bpp) rgb232,depth 24 (32bpp) little-endian rgb888,61.6867,63.2099,19.5887,23.5132,59.7956 + +depth 24 (32bpp) little-endian rgb888,depth 16 (16bpp) little-endian rgb565,96.8321,31.5562,40.3945,54.3236,25.0673 +depth 16 (16bpp) little-endian bgr565,depth 16 (16bpp) little-endian rgb565,104.757,31.7766,13.3725,22.5303,25.1443 +depth 7 (8bpp) rgb232,depth 16 (16bpp) little-endian rgb565,95.9251,61.594,15.1535,23.5132,25.098 + +depth 24 (32bpp) little-endian rgb888,depth 7 (8bpp) rgb232,133.42,34.5654,32.6115,54.3236,33.0056 +depth 16 (16bpp) little-endian rgb565,depth 7 (8bpp) rgb232,133.42,42.1833,15.7842,22.5303,32.7942 +depth 7 (8bpp) bgr232,depth 7 (8bpp) rgb232,137.45,75.7116,16.8699,23.5132,33.0056 + +depth 24 (32bpp) little-endian bgr888,depth 24 (32bpp) big-endian bgr888,67.5908,33.2738,45.2597,54.3957,58.9353 +depth 24 (32bpp) big-endian bgr888,depth 24 (32bpp) little-endian bgr888,67.7025,NaN,45.3097,54.1799,59.0202 +depth 16 (16bpp) little-endian bgr565,depth 16 (16bpp) big-endian bgr565,105.567,31.7766,13.8425,21.49,24.9756 +depth 16 (16bpp) big-endian bgr565,depth 16 (16bpp) little-endian bgr565,105.84,NaN,12.3746,20.9086,25.1443 diff --git a/tests/perf/results/notrans/i386.csv b/tests/perf/results/notrans/i386.csv new file mode 100644 index 00000000..bb9247d3 --- /dev/null +++ b/tests/perf/results/notrans/i386.csv @@ -0,0 +1,26 @@ +# Pixel Conversion Test 2014-07-09 14:14 UTC +# +# Frame buffer: 4096x4096 pixels +# Tile size: 64x64 pixels +# +# Note: Results are Mpixels/sec +# +Source format,Destination Format,memcpy,PixelTransformer,bufferFromBuffer,rgbFromBuffer,bufferFromRGB + +depth 24 (32bpp) little-endian rgb888,depth 24 (32bpp) little-endian rgb888,568.889,561.096,602.353,338.512,525.128 +depth 24 (32bpp) little-endian bgr888,depth 24 (32bpp) little-endian rgb888,561.096,215.579,280.548,338.512,525.128 +depth 16 (16bpp) little-endian rgb565,depth 24 (32bpp) little-endian rgb888,602.353,405.545,185.339,146.81,531.948 +depth 7 (8bpp) rgb232,depth 24 (32bpp) little-endian rgb888,640,531.948,192.3,170.667,546.133 + +depth 24 (32bpp) little-endian rgb888,depth 16 (16bpp) little-endian rgb565,853.333,217.872,235.402,344.202,256 +depth 16 (16bpp) little-endian bgr565,depth 16 (16bpp) little-endian rgb565,871.489,455.111,89.4323,146.81,256 +depth 7 (8bpp) rgb232,depth 16 (16bpp) little-endian rgb565,952.558,568.889,94.8148,167.184,251.288 + +depth 24 (32bpp) little-endian rgb888,depth 7 (8bpp) rgb232,1107.03,235.402,231.412,341.333,278.639 +depth 16 (16bpp) little-endian rgb565,depth 7 (8bpp) rgb232,1137.78,481.882,95.2558,144.735,276.757 +depth 7 (8bpp) bgr232,depth 7 (8bpp) rgb232,1204.71,553.514,101.136,169.256,280.548 + +depth 24 (32bpp) little-endian bgr888,depth 24 (32bpp) big-endian bgr888,568.889,212.228,276.757,338.512,525.128 +depth 24 (32bpp) big-endian bgr888,depth 24 (32bpp) little-endian bgr888,576.901,NaN,278.639,338.512,525.128 +depth 16 (16bpp) little-endian bgr565,depth 16 (16bpp) big-endian bgr565,890.435,455.111,89.4323,146.81,243.81 +depth 16 (16bpp) big-endian bgr565,depth 16 (16bpp) little-endian bgr565,871.489,NaN,85.3333,146.286,254.41 diff --git a/tests/perf/results/notrans/x86_64.csv b/tests/perf/results/notrans/x86_64.csv new file mode 100644 index 00000000..18fc03f3 --- /dev/null +++ b/tests/perf/results/notrans/x86_64.csv @@ -0,0 +1,26 @@ +# Pixel Conversion Test 2014-07-09 14:14 UTC +# +# Frame buffer: 4096x4096 pixels +# Tile size: 64x64 pixels +# +# Note: Results are Mpixels/sec +# +Source format,Destination Format,memcpy,PixelTransformer,bufferFromBuffer,rgbFromBuffer,bufferFromRGB + +depth 24 (32bpp) little-endian rgb888,depth 24 (32bpp) little-endian rgb888,576.901,576.901,546.133,338.512,602.353 +depth 24 (32bpp) little-endian bgr888,depth 24 (32bpp) little-endian rgb888,585.143,251.288,288.451,335.738,602.353 +depth 16 (16bpp) little-endian rgb565,depth 24 (32bpp) little-endian rgb888,585.143,405.545,205.829,162.54,620.606 +depth 7 (8bpp) rgb232,depth 24 (32bpp) little-endian rgb888,493.494,505.679,267.712,177.316,620.606 + +depth 24 (32bpp) little-endian rgb888,depth 16 (16bpp) little-endian rgb565,999.024,231.412,257.61,344.202,265.974 +depth 16 (16bpp) little-endian bgr565,depth 16 (16bpp) little-endian rgb565,975.238,455.111,101.638,165.161,267.712 +depth 7 (8bpp) rgb232,depth 16 (16bpp) little-endian rgb565,1050.26,576.901,105.296,181.239,269.474 + +depth 24 (32bpp) little-endian rgb888,depth 7 (8bpp) rgb232,1638.4,259.24,271.258,347.119,298.978 +depth 16 (16bpp) little-endian rgb565,depth 7 (8bpp) rgb232,1575.38,505.679,105.026,165.161,294.676 +depth 7 (8bpp) bgr232,depth 7 (8bpp) rgb232,1706.67,602.353,107.225,183.677,298.978 + +depth 24 (32bpp) little-endian bgr888,depth 24 (32bpp) big-endian bgr888,593.623,251.288,286.434,338.512,620.606 +depth 24 (32bpp) big-endian bgr888,depth 24 (32bpp) little-endian bgr888,593.623,NaN,282.483,344.202,611.343 +depth 16 (16bpp) little-endian bgr565,depth 16 (16bpp) big-endian bgr565,1050.26,450.11,97.7566,166.504,259.24 +depth 16 (16bpp) big-endian bgr565,depth 16 (16bpp) little-endian bgr565,999.024,NaN,96.6038,155.152,267.712 diff --git a/tests/perf/util.cxx b/tests/perf/util.cxx new file mode 100644 index 00000000..17a83698 --- /dev/null +++ b/tests/perf/util.cxx @@ -0,0 +1,178 @@ +/* Copyright 2013-2014 Pierre Ossman <ossman@cendio.se> for Cendio AB + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this software; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#ifdef WIN32 +#include <windows.h> +#else +#include <sys/resource.h> +#include <sys/time.h> +#endif + +#include "util.h" + +#ifdef WIN32 +typedef struct { + FILETIME kernelTime; + FILETIME userTime; +} syscounter_t; +#else +typedef struct rusage syscounter_t; +#endif + +static syscounter_t _globalCounter[2]; +static cpucounter_t globalCounter = _globalCounter; + +void startCpuCounter(void) +{ + startCpuCounter(globalCounter); +} + +void endCpuCounter(void) +{ + endCpuCounter(globalCounter); +} + +double getCpuCounter(void) +{ + return getCpuCounter(globalCounter); +} + +cpucounter_t newCpuCounter(void) +{ + syscounter_t *c; + + c = (syscounter_t*)malloc(sizeof(syscounter_t) * 2); + if (c == NULL) + return NULL; + + memset(c, 0, sizeof(syscounter_t) * 2); + + return c; +} + +void freeCpuCounter(cpucounter_t c) +{ + free(c); +} + +static void measureCpu(syscounter_t *counter) +{ +#ifdef WIN32 + FILETIME dummy1, dummy2; + + GetProcessTimes(GetCurrentProcess(), &dummy1, &dummy2, + &counter->kernelTime, &counter->userTime); +#else + getrusage(RUSAGE_SELF, counter); +#endif +} + +void startCpuCounter(cpucounter_t c) +{ + syscounter_t *s = (syscounter_t*)c; + measureCpu(&s[0]); +} + +void endCpuCounter(cpucounter_t c) +{ + syscounter_t *s = (syscounter_t*)c; + measureCpu(&s[1]); +} + +double getCpuCounter(cpucounter_t c) +{ + syscounter_t *s = (syscounter_t*)c; + double sysSeconds, userSeconds; + +#ifdef WIN32 + uint64_t counters[2]; + + counters[0] = (uint64_t)s[0].kernelTime.dwHighDateTime << 32 | + s[0].kernelTime.dwLowDateTime; + counters[1] = (uint64_t)s[1].kernelTime.dwHighDateTime << 32 | + s[1].kernelTime.dwLowDateTime; + + sysSeconds = (double)(counters[1] - counters[0]) / 10000000.0; + + counters[0] = (uint64_t)s[0].userTime.dwHighDateTime << 32 | + s[0].userTime.dwLowDateTime; + counters[1] = (uint64_t)s[1].userTime.dwHighDateTime << 32 | + s[1].userTime.dwLowDateTime; + + userSeconds = (double)(counters[1] - counters[0]) / 10000000.0; +#else + sysSeconds = (double)(s[1].ru_stime.tv_sec - + s[0].ru_stime.tv_sec); + sysSeconds += (double)(s[1].ru_stime.tv_usec - + s[0].ru_stime.tv_usec) / 1000000.0; + + userSeconds = (double)(s[1].ru_utime.tv_sec - + s[0].ru_utime.tv_sec); + userSeconds += (double)(s[1].ru_utime.tv_usec - + s[0].ru_utime.tv_usec) / 1000000.0; +#endif + + return sysSeconds + userSeconds; +} + +#ifdef WIN32 +static LARGE_INTEGER timeStart, timeEnd; +#else +static struct timeval timeStart, timeEnd; +#endif + +void startTimeCounter(void) +{ +#ifdef WIN32 + QueryPerformanceCounter(&timeStart); +#else + gettimeofday(&timeStart, NULL); +#endif +} + +void endTimeCounter(void) +{ +#ifdef WIN32 + QueryPerformanceCounter(&timeEnd); +#else + gettimeofday(&timeEnd, NULL); +#endif +} + +double getTimeCounter(void) +{ + double time; + +#ifdef WIN32 + LARGE_INTEGER freq; + + QueryPerformanceFrequency(&freq); + + time = timeEnd.QuadPart - timeStart.QuadPart; + time = time / freq.QuadPart; +#else + time = (double)timeEnd.tv_sec - timeStart.tv_sec; + time += (double)(timeEnd.tv_usec - timeStart.tv_usec) / 1000000.0; +#endif + + return time; +} diff --git a/tests/perf/util.h b/tests/perf/util.h new file mode 100644 index 00000000..2b8ab4a8 --- /dev/null +++ b/tests/perf/util.h @@ -0,0 +1,42 @@ +/* Copyright 2013-2014 Pierre Ossman <ossman@cendio.se> for Cendio AB + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this software; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + */ + +#ifndef __TESTS_UTIL_H__ +#define __TESTS_UTIL_H__ + +typedef void* cpucounter_t; + +void startCpuCounter(void); +void endCpuCounter(void); + +double getCpuCounter(void); + +cpucounter_t newCpuCounter(void); +void freeCpuCounter(cpucounter_t c); + +void startCpuCounter(cpucounter_t c); +void endCpuCounter(cpucounter_t c); + +double getCpuCounter(cpucounter_t c); + +void startTimeCounter(void); +void endTimeCounter(void); + +double getTimeCounter(void); + +#endif |