diff options
author | Shawn O. Pearce <spearce@spearce.org> | 2010-10-10 13:36:30 -0700 |
---|---|---|
committer | Chris Aniszczyk <caniszczyk@gmail.com> | 2010-10-11 16:20:00 -0500 |
commit | 14da6e0b9d19ec7e0cb9b3427044e178f751a59c (patch) | |
tree | b2cab3578be39ccad367e6c8a49636fcf445d89b | |
parent | 7429a9a5aa18ee9c7192413f5e2c91f9d997f61e (diff) | |
download | jgit-14da6e0b9d19ec7e0cb9b3427044e178f751a59c.tar.gz jgit-14da6e0b9d19ec7e0cb9b3427044e178f751a59c.zip |
debug-diff-algorithms: Real world performance test implementations
When working on a difference algorithm's implementation, its generally
more important to care about how it behaves on real-world inputs than
it does on fake inputs created for unit test cases. Run each
implementation against a number of real-world repositories, looking at
changes between files in each commit. This gives a better picture of
how a particular algorithm performs.
This test suite run against JGit and linux-2.6 with the current
available algorithms shows HistogramDiff always out-performs
MyersDiff, and by a wide margin on the linux-2.6 sources. As
HistogramDiff has similar output properties as PatienceDiff, the
resulting edits are probably also more human-readable. These test
results show that HistogramDiff is a good choice for the default
implementation, and also show that PatienceDiff isn't worth keeping.
jgit: start at baa83ae
2686 files, 760 commits
N= 3 min lines, 3016 max lines
Algorithm Time(ns) ( Time(ns) on Time(ns) on )
( N=3 N=3016 )
---------------------------------------------------------------------
histogram_myers 314652100 ( 3900 298100 )
histogram 315973000 ( 3800 302100 )
patience 774724900 ( 4500 347900 )
patience_histogram_myers 786332800 ( 3700 351200 )
myers 819359300 ( 4100 379100 )
patience_myers 843416700 ( 3800 348000 )
linux-2.6.git: start at 85a3318
4001 files, 2680 commits
N= 2 min lines, 39098 max lines
Algorithm Time(ns) ( Time(ns) on Time(ns) on )
( N=2 N=39098 )
---------------------------------------------------------------------
histogram_myers 1229870000 ( 5900 2642700 )
histogram 1235654100 ( 6000 2695400 )
patience 3856546000 ( 5900 2627700 )
patience_histogram_myers 3866728100 ( 7000 2624000 )
patience_myers 4004875300 ( 8000 2651700 )
myers 9794679000 ( 7200 2716200 )
Change-Id: I2502684d31f7851e720356820d04d8cf767f7229
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
-rw-r--r-- | org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin | 1 | ||||
-rw-r--r-- | org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/DiffAlgorithms.java | 400 |
2 files changed, 401 insertions, 0 deletions
diff --git a/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin b/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin index c63214776b..d98ed113c6 100644 --- a/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin +++ b/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin @@ -24,6 +24,7 @@ org.eclipse.jgit.pgm.Tag org.eclipse.jgit.pgm.UploadPack org.eclipse.jgit.pgm.Version +org.eclipse.jgit.pgm.debug.DiffAlgorithms org.eclipse.jgit.pgm.debug.MakeCacheTree org.eclipse.jgit.pgm.debug.ReadDirCache org.eclipse.jgit.pgm.debug.RebuildCommitGraph diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/DiffAlgorithms.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/DiffAlgorithms.java new file mode 100644 index 0000000000..ec329fc0f6 --- /dev/null +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/DiffAlgorithms.java @@ -0,0 +1,400 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.pgm.debug; + +import java.io.File; +import java.lang.management.ManagementFactory; +import java.lang.management.ThreadMXBean; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import org.eclipse.jgit.diff.DiffAlgorithm; +import org.eclipse.jgit.diff.HistogramDiff; +import org.eclipse.jgit.diff.MyersDiff; +import org.eclipse.jgit.diff.PatienceDiff; +import org.eclipse.jgit.diff.RawText; +import org.eclipse.jgit.diff.RawTextComparator; +import org.eclipse.jgit.errors.LargeObjectException; +import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.MutableObjectId; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectReader; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.lib.RepositoryBuilder; +import org.eclipse.jgit.lib.RepositoryCache; +import org.eclipse.jgit.pgm.CLIText; +import org.eclipse.jgit.pgm.TextBuiltin; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.treewalk.TreeWalk; +import org.eclipse.jgit.treewalk.filter.TreeFilter; +import org.eclipse.jgit.util.FS; +import org.kohsuke.args4j.Option; + +class DiffAlgorithms extends TextBuiltin { + + final Algorithm myers = new Algorithm() { + DiffAlgorithm create() { + return MyersDiff.INSTANCE; + } + }; + + final Algorithm histogram = new Algorithm() { + DiffAlgorithm create() { + HistogramDiff d = new HistogramDiff(); + d.setFallbackAlgorithm(null); + return d; + } + }; + + final Algorithm histogram_myers = new Algorithm() { + DiffAlgorithm create() { + HistogramDiff d = new HistogramDiff(); + d.setFallbackAlgorithm(MyersDiff.INSTANCE); + return d; + } + }; + + final Algorithm patience = new Algorithm() { + DiffAlgorithm create() { + PatienceDiff d = new PatienceDiff(); + d.setFallbackAlgorithm(null); + return d; + } + }; + + final Algorithm patience_myers = new Algorithm() { + DiffAlgorithm create() { + PatienceDiff d = new PatienceDiff(); + d.setFallbackAlgorithm(MyersDiff.INSTANCE); + return d; + } + }; + + final Algorithm patience_histogram_myers = new Algorithm() { + DiffAlgorithm create() { + HistogramDiff d2 = new HistogramDiff(); + d2.setFallbackAlgorithm(MyersDiff.INSTANCE); + PatienceDiff d1 = new PatienceDiff(); + d1.setFallbackAlgorithm(d2); + return d1; + } + }; + + // ----------------------------------------------------------------------- + // + // Implementation of the suite lives below this line. + // + // + + @Option(name = "--algorithm", multiValued = true, metaVar = "NAME", usage = "Enable algorithm(s)") + List<String> algorithms = new ArrayList<String>(); + + @Option(name = "--text-limit", metaVar = "LIMIT", usage = "Maximum size in KiB to scan per file revision") + int textLimit = 15 * 1024; // 15 MiB as later we do * 1024. + + @Option(name = "--repository", aliases = { "-r" }, multiValued = true, metaVar = "GIT_DIR", usage = "Repository to scan") + List<File> gitDirs = new ArrayList<File>(); + + @Option(name = "--count", metaVar = "LIMIT", usage = "Number of file revisions to be compared") + int count = 0; // unlimited + + private final RawTextComparator cmp = RawTextComparator.DEFAULT; + + private ThreadMXBean mxBean; + + @Override + protected boolean requiresRepository() { + return false; + } + + @Override + protected void run() throws Exception { + mxBean = ManagementFactory.getThreadMXBean(); + if (!mxBean.isCurrentThreadCpuTimeSupported()) + throw die("Current thread CPU time not supported on this JRE"); + + if (gitDirs.isEmpty()) { + RepositoryBuilder rb = new RepositoryBuilder() // + .setGitDir(gitdir) // + .readEnvironment() // + .findGitDir(); + if (rb.getGitDir() == null) + throw die(CLIText.get().cantFindGitDirectory); + gitDirs.add(rb.getGitDir()); + } + + for (File dir : gitDirs) { + RepositoryBuilder rb = new RepositoryBuilder(); + if (RepositoryCache.FileKey.isGitRepository(dir, FS.DETECTED)) + rb.setGitDir(dir); + else + rb.findGitDir(dir); + + Repository db = rb.build(); + try { + run(db); + } finally { + db.close(); + } + } + } + + private void run(Repository db) throws Exception { + List<Test> all = init(); + + long files = 0; + int commits = 0; + int minN = Integer.MAX_VALUE; + int maxN = 0; + + AbbreviatedObjectId startId; + ObjectReader or = db.newObjectReader(); + try { + final MutableObjectId id = new MutableObjectId(); + RevWalk rw = new RevWalk(or); + TreeWalk tw = new TreeWalk(or); + tw.setFilter(TreeFilter.ANY_DIFF); + tw.setRecursive(true); + + ObjectId start = db.resolve(Constants.HEAD); + startId = or.abbreviate(start); + rw.markStart(rw.parseCommit(start)); + for (;;) { + final RevCommit c = rw.next(); + if (c == null) + break; + commits++; + if (c.getParentCount() != 1) + continue; + + RevCommit p = c.getParent(0); + rw.parseHeaders(p); + tw.reset(new AnyObjectId[] { p.getTree(), c.getTree() }); + while (tw.next()) { + if (!isFile(tw, 0) || !isFile(tw, 1)) + continue; + + byte[] raw0; + try { + tw.getObjectId(id, 0); + raw0 = or.open(id).getCachedBytes(textLimit * 1024); + } catch (LargeObjectException tooBig) { + continue; + } + if (RawText.isBinary(raw0)) + continue; + + byte[] raw1; + try { + tw.getObjectId(id, 1); + raw1 = or.open(id).getCachedBytes(textLimit * 1024); + } catch (LargeObjectException tooBig) { + continue; + } + if (RawText.isBinary(raw1)) + continue; + + RawText txt0 = new RawText(raw0); + RawText txt1 = new RawText(raw1); + + minN = Math.min(minN, txt0.size() + txt1.size()); + maxN = Math.max(maxN, txt0.size() + txt1.size()); + + for (Test test : all) + testOne(test, txt0, txt1); + files++; + } + if (count > 0 && files > count) + break; + } + } finally { + or.release(); + } + + Collections.sort(all, new Comparator<Test>() { + public int compare(Test a, Test b) { + int cmp = Long.signum(a.runningTimeNanos - b.runningTimeNanos); + if (cmp == 0) + cmp = a.algorithm.name.compareTo(b.algorithm.name); + return cmp; + } + }); + + if (db.getDirectory() != null) { + String name = db.getDirectory().getName(); + File parent = db.getDirectory().getParentFile(); + if (name.equals(Constants.DOT_GIT_EXT) && parent != null) + name = parent.getName(); + out.println(name + ": start at " + startId.name()); + } + + out.format(" %12d files, %8d commits\n", files, commits); + out.format(" N=%10d min lines, %8d max lines\n", minN, maxN); + + out.format("%-25s %12s ( %12s %12s )\n", // + "Algorithm", "Time(ns)", "Time(ns) on", "Time(ns) on"); + out.format("%-25s %12s ( %12s %12s )\n", // + "", "", "N=" + minN, "N=" + maxN); + out.println("-----------------------------------------------------" + + "----------------"); + + for (Test test : all) { + out.format("%-25s %12d ( %12d %12d )", // + test.algorithm.name, // + test.runningTimeNanos, // + test.minN.runningTimeNanos, // + test.maxN.runningTimeNanos); + out.println(); + } + out.println(); + out.flush(); + } + + private static boolean isFile(TreeWalk tw, int ithTree) { + FileMode fm = tw.getFileMode(ithTree); + return FileMode.REGULAR_FILE.equals(fm) + || FileMode.EXECUTABLE_FILE.equals(fm); + } + + private static final int minCPUTimerTicks = 10; + + private void testOne(Test test, RawText a, RawText b) { + final DiffAlgorithm da = test.algorithm.create(); + int cpuTimeChanges = 0; + int cnt = 0; + + final long startTime = mxBean.getCurrentThreadCpuTime(); + long lastTime = startTime; + while (cpuTimeChanges < minCPUTimerTicks) { + da.diff(cmp, a, b); + cnt++; + + long interimTime = mxBean.getCurrentThreadCpuTime(); + if (interimTime != lastTime) { + cpuTimeChanges++; + lastTime = interimTime; + } + } + final long stopTime = mxBean.getCurrentThreadCpuTime(); + final long runTime = (stopTime - startTime) / cnt; + + test.runningTimeNanos += runTime; + + if (test.minN == null || a.size() + b.size() < test.minN.n) { + test.minN = new Run(); + test.minN.n = a.size() + b.size(); + test.minN.runningTimeNanos = runTime; + } + + if (test.maxN == null || a.size() + b.size() > test.maxN.n) { + test.maxN = new Run(); + test.maxN.n = a.size() + b.size(); + test.maxN.runningTimeNanos = runTime; + } + } + + private List<Test> init() { + List<Test> all = new ArrayList<Test>(); + + try { + for (Field f : DiffAlgorithms.class.getDeclaredFields()) { + if (f.getType() == Algorithm.class) { + f.setAccessible(true); + Algorithm alg = (Algorithm) f.get(this); + alg.name = f.getName(); + if (included(alg.name, algorithms)) { + Test test = new Test(); + test.algorithm = alg; + all.add(test); + } + } + } + } catch (IllegalArgumentException e) { + throw die("Cannot determine names", e); + } catch (IllegalAccessException e) { + throw die("Cannot determine names", e); + } + + return all; + } + + private static boolean included(String name, List<String> want) { + if (want.isEmpty()) + return true; + for (String s : want) { + if (s.equalsIgnoreCase(name)) + return true; + } + return false; + } + + private static abstract class Algorithm { + String name; + + abstract DiffAlgorithm create(); + } + + private static class Test { + Algorithm algorithm; + + long runningTimeNanos; + + Run minN; + + Run maxN; + } + + private static class Run { + int n; + + long runningTimeNanos; + } +} |