summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShawn O. Pearce <spearce@spearce.org>2010-10-10 13:36:30 -0700
committerChris Aniszczyk <caniszczyk@gmail.com>2010-10-11 16:20:00 -0500
commit14da6e0b9d19ec7e0cb9b3427044e178f751a59c (patch)
treeb2cab3578be39ccad367e6c8a49636fcf445d89b
parent7429a9a5aa18ee9c7192413f5e2c91f9d997f61e (diff)
downloadjgit-14da6e0b9d19ec7e0cb9b3427044e178f751a59c.tar.gz
jgit-14da6e0b9d19ec7e0cb9b3427044e178f751a59c.zip
debug-diff-algorithms: Real world performance test implementations
When working on a difference algorithm's implementation, its generally more important to care about how it behaves on real-world inputs than it does on fake inputs created for unit test cases. Run each implementation against a number of real-world repositories, looking at changes between files in each commit. This gives a better picture of how a particular algorithm performs. This test suite run against JGit and linux-2.6 with the current available algorithms shows HistogramDiff always out-performs MyersDiff, and by a wide margin on the linux-2.6 sources. As HistogramDiff has similar output properties as PatienceDiff, the resulting edits are probably also more human-readable. These test results show that HistogramDiff is a good choice for the default implementation, and also show that PatienceDiff isn't worth keeping. jgit: start at baa83ae 2686 files, 760 commits N= 3 min lines, 3016 max lines Algorithm Time(ns) ( Time(ns) on Time(ns) on ) ( N=3 N=3016 ) --------------------------------------------------------------------- histogram_myers 314652100 ( 3900 298100 ) histogram 315973000 ( 3800 302100 ) patience 774724900 ( 4500 347900 ) patience_histogram_myers 786332800 ( 3700 351200 ) myers 819359300 ( 4100 379100 ) patience_myers 843416700 ( 3800 348000 ) linux-2.6.git: start at 85a3318 4001 files, 2680 commits N= 2 min lines, 39098 max lines Algorithm Time(ns) ( Time(ns) on Time(ns) on ) ( N=2 N=39098 ) --------------------------------------------------------------------- histogram_myers 1229870000 ( 5900 2642700 ) histogram 1235654100 ( 6000 2695400 ) patience 3856546000 ( 5900 2627700 ) patience_histogram_myers 3866728100 ( 7000 2624000 ) patience_myers 4004875300 ( 8000 2651700 ) myers 9794679000 ( 7200 2716200 ) Change-Id: I2502684d31f7851e720356820d04d8cf767f7229 Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
-rw-r--r--org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin1
-rw-r--r--org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/DiffAlgorithms.java400
2 files changed, 401 insertions, 0 deletions
diff --git a/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin b/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin
index c63214776b..d98ed113c6 100644
--- a/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin
+++ b/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin
@@ -24,6 +24,7 @@ org.eclipse.jgit.pgm.Tag
org.eclipse.jgit.pgm.UploadPack
org.eclipse.jgit.pgm.Version
+org.eclipse.jgit.pgm.debug.DiffAlgorithms
org.eclipse.jgit.pgm.debug.MakeCacheTree
org.eclipse.jgit.pgm.debug.ReadDirCache
org.eclipse.jgit.pgm.debug.RebuildCommitGraph
diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/DiffAlgorithms.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/DiffAlgorithms.java
new file mode 100644
index 0000000000..ec329fc0f6
--- /dev/null
+++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/DiffAlgorithms.java
@@ -0,0 +1,400 @@
+/*
+ * Copyright (C) 2010, Google Inc.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.pgm.debug;
+
+import java.io.File;
+import java.lang.management.ManagementFactory;
+import java.lang.management.ThreadMXBean;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+import org.eclipse.jgit.diff.DiffAlgorithm;
+import org.eclipse.jgit.diff.HistogramDiff;
+import org.eclipse.jgit.diff.MyersDiff;
+import org.eclipse.jgit.diff.PatienceDiff;
+import org.eclipse.jgit.diff.RawText;
+import org.eclipse.jgit.diff.RawTextComparator;
+import org.eclipse.jgit.errors.LargeObjectException;
+import org.eclipse.jgit.lib.AbbreviatedObjectId;
+import org.eclipse.jgit.lib.AnyObjectId;
+import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.FileMode;
+import org.eclipse.jgit.lib.MutableObjectId;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.ObjectReader;
+import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.lib.RepositoryBuilder;
+import org.eclipse.jgit.lib.RepositoryCache;
+import org.eclipse.jgit.pgm.CLIText;
+import org.eclipse.jgit.pgm.TextBuiltin;
+import org.eclipse.jgit.revwalk.RevCommit;
+import org.eclipse.jgit.revwalk.RevWalk;
+import org.eclipse.jgit.treewalk.TreeWalk;
+import org.eclipse.jgit.treewalk.filter.TreeFilter;
+import org.eclipse.jgit.util.FS;
+import org.kohsuke.args4j.Option;
+
+class DiffAlgorithms extends TextBuiltin {
+
+ final Algorithm myers = new Algorithm() {
+ DiffAlgorithm create() {
+ return MyersDiff.INSTANCE;
+ }
+ };
+
+ final Algorithm histogram = new Algorithm() {
+ DiffAlgorithm create() {
+ HistogramDiff d = new HistogramDiff();
+ d.setFallbackAlgorithm(null);
+ return d;
+ }
+ };
+
+ final Algorithm histogram_myers = new Algorithm() {
+ DiffAlgorithm create() {
+ HistogramDiff d = new HistogramDiff();
+ d.setFallbackAlgorithm(MyersDiff.INSTANCE);
+ return d;
+ }
+ };
+
+ final Algorithm patience = new Algorithm() {
+ DiffAlgorithm create() {
+ PatienceDiff d = new PatienceDiff();
+ d.setFallbackAlgorithm(null);
+ return d;
+ }
+ };
+
+ final Algorithm patience_myers = new Algorithm() {
+ DiffAlgorithm create() {
+ PatienceDiff d = new PatienceDiff();
+ d.setFallbackAlgorithm(MyersDiff.INSTANCE);
+ return d;
+ }
+ };
+
+ final Algorithm patience_histogram_myers = new Algorithm() {
+ DiffAlgorithm create() {
+ HistogramDiff d2 = new HistogramDiff();
+ d2.setFallbackAlgorithm(MyersDiff.INSTANCE);
+ PatienceDiff d1 = new PatienceDiff();
+ d1.setFallbackAlgorithm(d2);
+ return d1;
+ }
+ };
+
+ // -----------------------------------------------------------------------
+ //
+ // Implementation of the suite lives below this line.
+ //
+ //
+
+ @Option(name = "--algorithm", multiValued = true, metaVar = "NAME", usage = "Enable algorithm(s)")
+ List<String> algorithms = new ArrayList<String>();
+
+ @Option(name = "--text-limit", metaVar = "LIMIT", usage = "Maximum size in KiB to scan per file revision")
+ int textLimit = 15 * 1024; // 15 MiB as later we do * 1024.
+
+ @Option(name = "--repository", aliases = { "-r" }, multiValued = true, metaVar = "GIT_DIR", usage = "Repository to scan")
+ List<File> gitDirs = new ArrayList<File>();
+
+ @Option(name = "--count", metaVar = "LIMIT", usage = "Number of file revisions to be compared")
+ int count = 0; // unlimited
+
+ private final RawTextComparator cmp = RawTextComparator.DEFAULT;
+
+ private ThreadMXBean mxBean;
+
+ @Override
+ protected boolean requiresRepository() {
+ return false;
+ }
+
+ @Override
+ protected void run() throws Exception {
+ mxBean = ManagementFactory.getThreadMXBean();
+ if (!mxBean.isCurrentThreadCpuTimeSupported())
+ throw die("Current thread CPU time not supported on this JRE");
+
+ if (gitDirs.isEmpty()) {
+ RepositoryBuilder rb = new RepositoryBuilder() //
+ .setGitDir(gitdir) //
+ .readEnvironment() //
+ .findGitDir();
+ if (rb.getGitDir() == null)
+ throw die(CLIText.get().cantFindGitDirectory);
+ gitDirs.add(rb.getGitDir());
+ }
+
+ for (File dir : gitDirs) {
+ RepositoryBuilder rb = new RepositoryBuilder();
+ if (RepositoryCache.FileKey.isGitRepository(dir, FS.DETECTED))
+ rb.setGitDir(dir);
+ else
+ rb.findGitDir(dir);
+
+ Repository db = rb.build();
+ try {
+ run(db);
+ } finally {
+ db.close();
+ }
+ }
+ }
+
+ private void run(Repository db) throws Exception {
+ List<Test> all = init();
+
+ long files = 0;
+ int commits = 0;
+ int minN = Integer.MAX_VALUE;
+ int maxN = 0;
+
+ AbbreviatedObjectId startId;
+ ObjectReader or = db.newObjectReader();
+ try {
+ final MutableObjectId id = new MutableObjectId();
+ RevWalk rw = new RevWalk(or);
+ TreeWalk tw = new TreeWalk(or);
+ tw.setFilter(TreeFilter.ANY_DIFF);
+ tw.setRecursive(true);
+
+ ObjectId start = db.resolve(Constants.HEAD);
+ startId = or.abbreviate(start);
+ rw.markStart(rw.parseCommit(start));
+ for (;;) {
+ final RevCommit c = rw.next();
+ if (c == null)
+ break;
+ commits++;
+ if (c.getParentCount() != 1)
+ continue;
+
+ RevCommit p = c.getParent(0);
+ rw.parseHeaders(p);
+ tw.reset(new AnyObjectId[] { p.getTree(), c.getTree() });
+ while (tw.next()) {
+ if (!isFile(tw, 0) || !isFile(tw, 1))
+ continue;
+
+ byte[] raw0;
+ try {
+ tw.getObjectId(id, 0);
+ raw0 = or.open(id).getCachedBytes(textLimit * 1024);
+ } catch (LargeObjectException tooBig) {
+ continue;
+ }
+ if (RawText.isBinary(raw0))
+ continue;
+
+ byte[] raw1;
+ try {
+ tw.getObjectId(id, 1);
+ raw1 = or.open(id).getCachedBytes(textLimit * 1024);
+ } catch (LargeObjectException tooBig) {
+ continue;
+ }
+ if (RawText.isBinary(raw1))
+ continue;
+
+ RawText txt0 = new RawText(raw0);
+ RawText txt1 = new RawText(raw1);
+
+ minN = Math.min(minN, txt0.size() + txt1.size());
+ maxN = Math.max(maxN, txt0.size() + txt1.size());
+
+ for (Test test : all)
+ testOne(test, txt0, txt1);
+ files++;
+ }
+ if (count > 0 && files > count)
+ break;
+ }
+ } finally {
+ or.release();
+ }
+
+ Collections.sort(all, new Comparator<Test>() {
+ public int compare(Test a, Test b) {
+ int cmp = Long.signum(a.runningTimeNanos - b.runningTimeNanos);
+ if (cmp == 0)
+ cmp = a.algorithm.name.compareTo(b.algorithm.name);
+ return cmp;
+ }
+ });
+
+ if (db.getDirectory() != null) {
+ String name = db.getDirectory().getName();
+ File parent = db.getDirectory().getParentFile();
+ if (name.equals(Constants.DOT_GIT_EXT) && parent != null)
+ name = parent.getName();
+ out.println(name + ": start at " + startId.name());
+ }
+
+ out.format(" %12d files, %8d commits\n", files, commits);
+ out.format(" N=%10d min lines, %8d max lines\n", minN, maxN);
+
+ out.format("%-25s %12s ( %12s %12s )\n", //
+ "Algorithm", "Time(ns)", "Time(ns) on", "Time(ns) on");
+ out.format("%-25s %12s ( %12s %12s )\n", //
+ "", "", "N=" + minN, "N=" + maxN);
+ out.println("-----------------------------------------------------"
+ + "----------------");
+
+ for (Test test : all) {
+ out.format("%-25s %12d ( %12d %12d )", //
+ test.algorithm.name, //
+ test.runningTimeNanos, //
+ test.minN.runningTimeNanos, //
+ test.maxN.runningTimeNanos);
+ out.println();
+ }
+ out.println();
+ out.flush();
+ }
+
+ private static boolean isFile(TreeWalk tw, int ithTree) {
+ FileMode fm = tw.getFileMode(ithTree);
+ return FileMode.REGULAR_FILE.equals(fm)
+ || FileMode.EXECUTABLE_FILE.equals(fm);
+ }
+
+ private static final int minCPUTimerTicks = 10;
+
+ private void testOne(Test test, RawText a, RawText b) {
+ final DiffAlgorithm da = test.algorithm.create();
+ int cpuTimeChanges = 0;
+ int cnt = 0;
+
+ final long startTime = mxBean.getCurrentThreadCpuTime();
+ long lastTime = startTime;
+ while (cpuTimeChanges < minCPUTimerTicks) {
+ da.diff(cmp, a, b);
+ cnt++;
+
+ long interimTime = mxBean.getCurrentThreadCpuTime();
+ if (interimTime != lastTime) {
+ cpuTimeChanges++;
+ lastTime = interimTime;
+ }
+ }
+ final long stopTime = mxBean.getCurrentThreadCpuTime();
+ final long runTime = (stopTime - startTime) / cnt;
+
+ test.runningTimeNanos += runTime;
+
+ if (test.minN == null || a.size() + b.size() < test.minN.n) {
+ test.minN = new Run();
+ test.minN.n = a.size() + b.size();
+ test.minN.runningTimeNanos = runTime;
+ }
+
+ if (test.maxN == null || a.size() + b.size() > test.maxN.n) {
+ test.maxN = new Run();
+ test.maxN.n = a.size() + b.size();
+ test.maxN.runningTimeNanos = runTime;
+ }
+ }
+
+ private List<Test> init() {
+ List<Test> all = new ArrayList<Test>();
+
+ try {
+ for (Field f : DiffAlgorithms.class.getDeclaredFields()) {
+ if (f.getType() == Algorithm.class) {
+ f.setAccessible(true);
+ Algorithm alg = (Algorithm) f.get(this);
+ alg.name = f.getName();
+ if (included(alg.name, algorithms)) {
+ Test test = new Test();
+ test.algorithm = alg;
+ all.add(test);
+ }
+ }
+ }
+ } catch (IllegalArgumentException e) {
+ throw die("Cannot determine names", e);
+ } catch (IllegalAccessException e) {
+ throw die("Cannot determine names", e);
+ }
+
+ return all;
+ }
+
+ private static boolean included(String name, List<String> want) {
+ if (want.isEmpty())
+ return true;
+ for (String s : want) {
+ if (s.equalsIgnoreCase(name))
+ return true;
+ }
+ return false;
+ }
+
+ private static abstract class Algorithm {
+ String name;
+
+ abstract DiffAlgorithm create();
+ }
+
+ private static class Test {
+ Algorithm algorithm;
+
+ long runningTimeNanos;
+
+ Run minN;
+
+ Run maxN;
+ }
+
+ private static class Run {
+ int n;
+
+ long runningTimeNanos;
+ }
+}