diff options
author | Shawn O. Pearce <spearce@spearce.org> | 2010-07-16 10:22:15 -0700 |
---|---|---|
committer | Shawn O. Pearce <spearce@spearce.org> | 2010-07-16 10:22:15 -0700 |
commit | 6e155d5f415e7f62f3f25e082dbee558e5be0b2d (patch) | |
tree | 3e793afd4e2d1cdce5c6cd55c0ebfdcf51c088d1 | |
parent | 0b46e70155e1a14edc77f32e030094fb499887cf (diff) | |
parent | 31311cacfd311f9a78bdf012a9f6ba1907b9964a (diff) | |
download | jgit-6e155d5f415e7f62f3f25e082dbee558e5be0b2d.tar.gz jgit-6e155d5f415e7f62f3f25e082dbee558e5be0b2d.zip |
Merge branch 'js/rename'
* js/rename:
Implemented file path based tie breaking to exact rename detection
Added more test cases for RenameDetector
Added very small optimization to exact rename detection
Fixed Misleading Javadoc
Added file path similarity to scoring metric in rename detection
Fixed potential div by zero bug
Added file size based rename detection optimization
Create FileHeader from DiffEntry
log: Implement --follow
Cache the diff configuration section
log: Add whitespace ignore options
Format submodule links during differences
Redo DiffFormatter API to be easier to use
log, diff: Add rename detection support
Implement similarity based rename detection
Added a preliminary version of rename detection
Refactored code out of FileHeader to facilitate rename detection
30 files changed, 3341 insertions, 307 deletions
diff --git a/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/CLIText.properties b/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/CLIText.properties index d13c47d297..e879d6b605 100644 --- a/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/CLIText.properties +++ b/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/CLIText.properties @@ -66,7 +66,9 @@ metaVar_directory=DIRECTORY metaVar_file=FILE metaVar_gitDir=GIT_DIR metaVar_hostName=HOSTNAME +metaVar_linesOfContext=lines metaVar_message=message +metaVar_n=n metaVar_name=name metaVar_object=object metaVar_op=OP @@ -139,6 +141,7 @@ usage_cloneRepositoryIntoNewDir=Clone a repository into a new directory usage_configureTheServiceInDaemonServicename=configure the service in daemon.servicename usage_deleteBranchEvenIfNotMerged=delete branch (even if not merged) usage_deleteFullyMergedBranch=delete fully merged branch +usage_detectRenames=detect renamed files usage_directoriesToExport=directories to export usage_disableTheServiceInAllRepositories=disable the service in all repositories usage_displayAListOfAllRegisteredJgitCommands=Display a list of all registered jgit commands @@ -160,6 +163,7 @@ usage_listBothRemoteTrackingAndLocalBranches=list both remote-tracking and local usage_listCreateOrDeleteBranches=List, create, or delete branches usage_logAllPretty=format:%H %ct %P' output=log --all '--pretty=format:%H %ct %P' output usage_moveRenameABranch=move/rename a branch +usage_nameStatus=show only name and status of files usage_outputFile=Output file usage_path=path usage_performFsckStyleChecksOnReceive=perform fsck style checks on receive @@ -168,8 +172,10 @@ usage_produceAnEclipseIPLog=Produce an Eclipse IP log usage_pruneStaleTrackingRefs=prune stale tracking refs usage_recurseIntoSubtrees=recurse into subtrees usage_recordChangesToRepository=Record changes to the repository +usage_renameLimit=limit size of rename matrix usage_setTheGitRepositoryToOperateOn=set the git repository to operate on usage_showRefNamesMatchingCommits=Show ref names matching commits +usage_showPatch=display patch usage_symbolicVersionForTheProject=Symbolic version for the project usage_synchronizeIPZillaData=Synchronize IPZilla data usage_tagMessage=tag message diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java index fc1e400ab0..77ed73048d 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java @@ -45,31 +45,34 @@ package org.eclipse.jgit.pgm; +import java.io.BufferedOutputStream; import java.io.IOException; -import java.io.PrintStream; +import java.io.PrintWriter; import java.util.ArrayList; import java.util.List; -import org.kohsuke.args4j.Argument; -import org.kohsuke.args4j.Option; - +import org.eclipse.jgit.diff.DiffEntry; import org.eclipse.jgit.diff.DiffFormatter; -import org.eclipse.jgit.diff.MyersDiff; -import org.eclipse.jgit.diff.RawText; import org.eclipse.jgit.diff.RawTextIgnoreAllWhitespace; import org.eclipse.jgit.diff.RawTextIgnoreLeadingWhitespace; import org.eclipse.jgit.diff.RawTextIgnoreTrailingWhitespace; import org.eclipse.jgit.diff.RawTextIgnoreWhitespaceChange; -import org.eclipse.jgit.lib.FileMode; -import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.diff.RenameDetector; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.TextProgressMonitor; import org.eclipse.jgit.pgm.opt.PathTreeFilterHandler; import org.eclipse.jgit.treewalk.AbstractTreeIterator; import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.treewalk.filter.AndTreeFilter; import org.eclipse.jgit.treewalk.filter.TreeFilter; +import org.kohsuke.args4j.Argument; +import org.kohsuke.args4j.Option; @Command(common = true, usage = "usage_ShowDiffs") class Diff extends TextBuiltin { + private final DiffFormatter diffFmt = new DiffFormatter( // + new BufferedOutputStream(System.out)); + @Argument(index = 0, metaVar = "metaVar_treeish", required = true) void tree_0(final AbstractTreeIterator c) { trees.add(c); @@ -78,25 +81,101 @@ class Diff extends TextBuiltin { @Argument(index = 1, metaVar = "metaVar_treeish", required = true) private final List<AbstractTreeIterator> trees = new ArrayList<AbstractTreeIterator>(); - @Option(name = "--", metaVar = "metaVar_port", multiValued = true, handler = PathTreeFilterHandler.class) + @Option(name = "--", metaVar = "metaVar_paths", multiValued = true, handler = PathTreeFilterHandler.class) private TreeFilter pathFilter = TreeFilter.ALL; + // BEGIN -- Options shared with Log + @Option(name = "-p", usage = "usage_showPatch") + boolean showPatch; + + @Option(name = "-M", usage = "usage_detectRenames") + private boolean detectRenames; + + @Option(name = "-l", usage = "usage_renameLimit") + private Integer renameLimit; + + @Option(name = "--name-status", usage = "usage_nameStatus") + private boolean showNameAndStatusOnly; + @Option(name = "--ignore-space-at-eol") - private boolean ignoreWsTrailing; + void ignoreSpaceAtEol(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreTrailingWhitespace.FACTORY); + } @Option(name = "--ignore-leading-space") - private boolean ignoreWsLeading; + void ignoreLeadingSpace(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreLeadingWhitespace.FACTORY); + } @Option(name = "-b", aliases = { "--ignore-space-change" }) - private boolean ignoreWsChange; + void ignoreSpaceChange(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreWhitespaceChange.FACTORY); + } @Option(name = "-w", aliases = { "--ignore-all-space" }) - private boolean ignoreWsAll; + void ignoreAllSpace(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreAllWhitespace.FACTORY); + } + + @Option(name = "-U", aliases = { "--unified" }, metaVar = "metaVar_linesOfContext") + void unified(int lines) { + diffFmt.setContext(lines); + } + + @Option(name = "--abbrev", metaVar = "n") + void abbrev(int lines) { + diffFmt.setAbbreviationLength(lines); + } - private DiffFormatter fmt = new DiffFormatter(); + @Option(name = "--full-index") + void abbrev(@SuppressWarnings("unused") boolean on) { + diffFmt.setAbbreviationLength(Constants.OBJECT_ID_STRING_LENGTH); + } + + // END -- Options shared with Log @Override protected void run() throws Exception { + List<DiffEntry> files = scan(); + + if (showNameAndStatusOnly) { + nameStatus(out, files); + out.flush(); + + } else { + diffFmt.setRepository(db); + diffFmt.format(files); + diffFmt.flush(); + } + } + + static void nameStatus(PrintWriter out, List<DiffEntry> files) { + for (DiffEntry ent : files) { + switch (ent.getChangeType()) { + case ADD: + out.println("A\t" + ent.getNewName()); + break; + case DELETE: + out.println("D\t" + ent.getOldName()); + break; + case MODIFY: + out.println("M\t" + ent.getNewName()); + break; + case COPY: + out.format("C%1$03d\t%2$s\t%3$s", ent.getScore(), // + ent.getOldName(), ent.getNewName()); + out.println(); + break; + case RENAME: + out.format("R%1$03d\t%2$s\t%3$s", ent.getScore(), // + ent.getOldName(), ent.getNewName()); + out.println(); + break; + } + } + } + + private List<DiffEntry> scan() throws IOException { final TreeWalk walk = new TreeWalk(db); walk.reset(); walk.setRecursive(true); @@ -104,65 +183,14 @@ class Diff extends TextBuiltin { walk.addTree(i); walk.setFilter(AndTreeFilter.create(TreeFilter.ANY_DIFF, pathFilter)); - while (walk.next()) - outputDiff(System.out, walk.getPathString(), - walk.getObjectId(0), walk.getFileMode(0), - walk.getObjectId(1), walk.getFileMode(1)); - } - - protected void outputDiff(PrintStream out, String path, - ObjectId id1, FileMode mode1, ObjectId id2, FileMode mode2) throws IOException { - String name1 = "a/" + path; - String name2 = "b/" + path; - out.println("diff --git " + name1 + " " + name2); - boolean isNew=false; - boolean isDelete=false; - if (id1.equals(ObjectId.zeroId())) { - out.println("new file mode " + mode2); - isNew=true; - } else if (id2.equals(ObjectId.zeroId())) { - out.println("deleted file mode " + mode1); - isDelete=true; - } else if (!mode1.equals(mode2)) { - out.println("old mode " + mode1); - out.println("new mode " + mode2); + List<DiffEntry> files = DiffEntry.scan(walk); + if (detectRenames) { + RenameDetector rd = new RenameDetector(db); + if (renameLimit != null) + rd.setRenameLimit(renameLimit.intValue()); + rd.addAll(files); + files = rd.compute(new TextProgressMonitor()); } - out.println("index " + id1.abbreviate(db, 7).name() - + ".." + id2.abbreviate(db, 7).name() - + (mode1.equals(mode2) ? " " + mode1 : "")); - out.println("--- " + (isNew ? "/dev/null" : name1)); - out.println("+++ " + (isDelete ? "/dev/null" : name2)); - - byte[] aRaw = getRawBytes(id1); - byte[] bRaw = getRawBytes(id2); - - if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) { - out.println("Binary files differ"); - return; - } - - RawText a = getRawText(aRaw); - RawText b = getRawText(bRaw); - MyersDiff diff = new MyersDiff(a, b); - fmt.formatEdits(out, a, b, diff.getEdits()); - } - - private byte[] getRawBytes(ObjectId id) throws IOException { - if (id.equals(ObjectId.zeroId())) - return new byte[] {}; - return db.openBlob(id).getCachedBytes(); - } - - private RawText getRawText(byte[] raw) { - if (ignoreWsAll) - return new RawTextIgnoreAllWhitespace(raw); - else if (ignoreWsTrailing) - return new RawTextIgnoreTrailingWhitespace(raw); - else if (ignoreWsChange) - return new RawTextIgnoreWhitespaceChange(raw); - else if (ignoreWsLeading) - return new RawTextIgnoreLeadingWhitespace(raw); - else - return new RawText(raw); + return files; } } diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java index 9aa197e4ab..48a05915e4 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java @@ -45,22 +45,39 @@ package org.eclipse.jgit.pgm; +import java.io.BufferedOutputStream; +import java.io.IOException; import java.text.DateFormat; import java.text.MessageFormat; import java.text.SimpleDateFormat; import java.util.Collection; +import java.util.Collections; import java.util.Iterator; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.TimeZone; -import org.kohsuke.args4j.Option; +import org.eclipse.jgit.diff.DiffEntry; +import org.eclipse.jgit.diff.DiffFormatter; +import org.eclipse.jgit.diff.RawTextIgnoreAllWhitespace; +import org.eclipse.jgit.diff.RawTextIgnoreLeadingWhitespace; +import org.eclipse.jgit.diff.RawTextIgnoreTrailingWhitespace; +import org.eclipse.jgit.diff.RawTextIgnoreWhitespaceChange; +import org.eclipse.jgit.diff.RenameDetector; +import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.PersonIdent; import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.revwalk.FollowFilter; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.treewalk.TreeWalk; +import org.eclipse.jgit.treewalk.filter.AndTreeFilter; +import org.eclipse.jgit.treewalk.filter.TreeFilter; +import org.kohsuke.args4j.Option; @Command(common = true, usage = "usage_viewCommitHistory") class Log extends RevWalkTextBuiltin { @@ -68,11 +85,64 @@ class Log extends RevWalkTextBuiltin { private final DateFormat fmt; + private final DiffFormatter diffFmt = new DiffFormatter( // + new BufferedOutputStream(System.out)); + private Map<AnyObjectId, Set<Ref>> allRefsByPeeledObjectId; @Option(name="--decorate", usage="usage_showRefNamesMatchingCommits") private boolean decorate; + // BEGIN -- Options shared with Diff + @Option(name = "-p", usage = "usage_showPatch") + boolean showPatch; + + @Option(name = "-M", usage = "usage_detectRenames") + private boolean detectRenames; + + @Option(name = "-l", usage = "usage_renameLimit") + private Integer renameLimit; + + @Option(name = "--name-status", usage = "usage_nameStatus") + private boolean showNameAndStatusOnly; + + @Option(name = "--ignore-space-at-eol") + void ignoreSpaceAtEol(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreTrailingWhitespace.FACTORY); + } + + @Option(name = "--ignore-leading-space") + void ignoreLeadingSpace(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreLeadingWhitespace.FACTORY); + } + + @Option(name = "-b", aliases = { "--ignore-space-change" }) + void ignoreSpaceChange(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreWhitespaceChange.FACTORY); + } + + @Option(name = "-w", aliases = { "--ignore-all-space" }) + void ignoreAllSpace(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreAllWhitespace.FACTORY); + } + + @Option(name = "-U", aliases = { "--unified" }, metaVar = "metaVar_linesOfContext") + void unified(int lines) { + diffFmt.setContext(lines); + } + + @Option(name = "--abbrev", metaVar = "metaVar_n") + void abbrev(int lines) { + diffFmt.setAbbreviationLength(lines); + } + + @Option(name = "--full-index") + void abbrev(@SuppressWarnings("unused") boolean on) { + diffFmt.setAbbreviationLength(Constants.OBJECT_ID_STRING_LENGTH); + } + + // END -- Options shared with Diff + Log() { fmt = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy ZZZZZ", Locale.US); } @@ -120,6 +190,77 @@ class Log extends RevWalkTextBuiltin { } out.println(); + if (c.getParentCount() == 1 && (showNameAndStatusOnly || showPatch)) + showDiff(c); out.flush(); } + + private void showDiff(RevCommit c) throws IOException { + final TreeWalk tw = new TreeWalk(db); + tw.setRecursive(true); + tw.reset(); + tw.addTree(c.getParent(0).getTree()); + tw.addTree(c.getTree()); + tw.setFilter(AndTreeFilter.create(pathFilter, TreeFilter.ANY_DIFF)); + + List<DiffEntry> files = DiffEntry.scan(tw); + if (pathFilter instanceof FollowFilter && isAdd(files)) { + // The file we are following was added here, find where it + // came from so we can properly show the rename or copy, + // then continue digging backwards. + // + tw.reset(); + tw.addTree(c.getParent(0).getTree()); + tw.addTree(c.getTree()); + tw.setFilter(TreeFilter.ANY_DIFF); + files = updateFollowFilter(detectRenames(DiffEntry.scan(tw))); + + } else if (detectRenames) + files = detectRenames(files); + + if (showNameAndStatusOnly) { + Diff.nameStatus(out, files); + + } else { + diffFmt.setRepository(db); + diffFmt.format(files); + diffFmt.flush(); + } + out.println(); + } + + private List<DiffEntry> detectRenames(List<DiffEntry> files) + throws IOException { + RenameDetector rd = new RenameDetector(db); + if (renameLimit != null) + rd.setRenameLimit(renameLimit.intValue()); + rd.addAll(files); + return rd.compute(); + } + + private boolean isAdd(List<DiffEntry> files) { + String oldPath = ((FollowFilter) pathFilter).getPath(); + for (DiffEntry ent : files) { + if (ent.getChangeType() == ChangeType.ADD + && ent.getNewName().equals(oldPath)) + return true; + } + return false; + } + + private List<DiffEntry> updateFollowFilter(List<DiffEntry> files) { + String oldPath = ((FollowFilter) pathFilter).getPath(); + for (DiffEntry ent : files) { + if (isRename(ent) && ent.getNewName().equals(oldPath)) { + pathFilter = FollowFilter.create(ent.getOldName()); + return Collections.singletonList(ent); + } + } + return Collections.emptyList(); + } + + private static boolean isRename(DiffEntry ent) { + return ent.getChangeType() == ChangeType.RENAME + || ent.getChangeType() == ChangeType.COPY; + } } diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/RevWalkTextBuiltin.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/RevWalkTextBuiltin.java index ea6eeb102c..bf3924b70b 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/RevWalkTextBuiltin.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/RevWalkTextBuiltin.java @@ -53,6 +53,7 @@ import org.kohsuke.args4j.Option; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.pgm.opt.PathTreeFilterHandler; +import org.eclipse.jgit.revwalk.FollowFilter; import org.eclipse.jgit.revwalk.ObjectWalk; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevFlag; @@ -110,11 +111,16 @@ abstract class RevWalkTextBuiltin extends TextBuiltin { enableRevSort(RevSort.BOUNDARY, on); } + @Option(name = "--follow", metaVar = "metaVar_path") + void follow(final String path) { + pathFilter = FollowFilter.create(path); + } + @Argument(index = 0, metaVar = "metaVar_commitish") private final List<RevCommit> commits = new ArrayList<RevCommit>(); @Option(name = "--", metaVar = "metaVar_path", multiValued = true, handler = PathTreeFilterHandler.class) - private TreeFilter pathFilter = TreeFilter.ALL; + protected TreeFilter pathFilter = TreeFilter.ALL; private final List<RevFilter> revLimiter = new ArrayList<RevFilter>(); @@ -139,7 +145,9 @@ abstract class RevWalkTextBuiltin extends TextBuiltin { for (final RevSort s : sorting) walk.sort(s, true); - if (pathFilter != TreeFilter.ALL) + if (pathFilter instanceof FollowFilter) + walk.setTreeFilter(pathFilter); + else if (pathFilter != TreeFilter.ALL) walk.setTreeFilter(AndTreeFilter.create(pathFilter, TreeFilter.ANY_DIFF)); diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterReflowTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterReflowTest.java index d9e50d20af..d9c95d7651 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterReflowTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterReflowTest.java @@ -68,7 +68,7 @@ public class DiffFormatterReflowTest extends TestCase { protected void setUp() throws Exception { super.setUp(); out = new ByteArrayOutputStream(); - fmt = new DiffFormatter(); + fmt = new DiffFormatter(out); } public void testNegativeContextFails() throws IOException { @@ -143,7 +143,7 @@ public class DiffFormatterReflowTest extends TestCase { } private void assertFormatted(final String name) throws IOException { - fmt.format(out, file, a, b); + fmt.format(file, a, b); final String exp = RawParseUtils.decode(readFile(name)); assertEquals(exp, RawParseUtils.decode(out.toByteArray())); } diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterTest.java new file mode 100644 index 0000000000..996ee35a1e --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterTest.java @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.junit.TestRepository; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.RepositoryTestCase; +import org.eclipse.jgit.patch.FileHeader; +import org.eclipse.jgit.patch.HunkHeader; +import org.eclipse.jgit.util.RawParseUtils; +import org.eclipse.jgit.util.io.DisabledOutputStream; + +public class DiffFormatterTest extends RepositoryTestCase { + private static final String DIFF = "diff --git "; + + private static final String REGULAR_FILE = "100644"; + + private static final String GITLINK = "160000"; + + private static final String PATH_A = "src/a"; + + private static final String PATH_B = "src/b"; + + private DiffFormatter df; + + private TestRepository testDb; + + @Override + public void setUp() throws Exception { + super.setUp(); + testDb = new TestRepository(db); + df = new DiffFormatter(DisabledOutputStream.INSTANCE); + df.setRepository(db); + } + + public void testCreateFileHeader_Modify() throws Exception { + ObjectId adId = blob("a\nd\n"); + ObjectId abcdId = blob("a\nb\nc\nd\n"); + + String diffHeader = makeDiffHeader(PATH_A, PATH_A, adId, abcdId); + + DiffEntry ad = DiffEntry.delete(PATH_A, adId); + DiffEntry abcd = DiffEntry.add(PATH_A, abcdId); + + DiffEntry mod = DiffEntry.pair(ChangeType.MODIFY, ad, abcd, 0); + + FileHeader fh = df.createFileHeader(mod); + + assertEquals(diffHeader, RawParseUtils.decode(fh.getBuffer())); + assertEquals(0, fh.getStartOffset()); + assertEquals(fh.getBuffer().length, fh.getEndOffset()); + assertEquals(FileHeader.PatchType.UNIFIED, fh.getPatchType()); + + assertEquals(1, fh.getHunks().size()); + + HunkHeader hh = fh.getHunks().get(0); + assertEquals(1, hh.toEditList().size()); + + EditList el = hh.toEditList(); + assertEquals(1, el.size()); + + Edit e = el.get(0); + assertEquals(1, e.getBeginA()); + assertEquals(1, e.getEndA()); + assertEquals(1, e.getBeginB()); + assertEquals(3, e.getEndB()); + assertEquals(Edit.Type.INSERT, e.getType()); + } + + public void testCreateFileHeader_Binary() throws Exception { + ObjectId adId = blob("a\nd\n"); + ObjectId binId = blob("a\nb\nc\n\0\0\0\0d\n"); + + String diffHeader = makeDiffHeader(PATH_A, PATH_B, adId, binId) + + "Binary files differ\n"; + + DiffEntry ad = DiffEntry.delete(PATH_A, adId); + DiffEntry abcd = DiffEntry.add(PATH_B, binId); + + DiffEntry mod = DiffEntry.pair(ChangeType.MODIFY, ad, abcd, 0); + + FileHeader fh = df.createFileHeader(mod); + + assertEquals(diffHeader, RawParseUtils.decode(fh.getBuffer())); + assertEquals(FileHeader.PatchType.BINARY, fh.getPatchType()); + + assertEquals(1, fh.getHunks().size()); + + HunkHeader hh = fh.getHunks().get(0); + assertEquals(0, hh.toEditList().size()); + } + + public void testCreateFileHeader_GitLink() throws Exception { + ObjectId aId = blob("a\n"); + ObjectId bId = blob("b\n"); + + String diffHeader = makeDiffHeaderModeChange(PATH_A, PATH_A, aId, bId, + GITLINK, REGULAR_FILE) + + "-Subproject commit " + aId.name() + "\n"; + + DiffEntry ad = DiffEntry.delete(PATH_A, aId); + ad.oldMode = FileMode.GITLINK; + DiffEntry abcd = DiffEntry.add(PATH_A, bId); + + DiffEntry mod = DiffEntry.pair(ChangeType.MODIFY, ad, abcd, 0); + + FileHeader fh = df.createFileHeader(mod); + + assertEquals(diffHeader, RawParseUtils.decode(fh.getBuffer())); + + assertEquals(1, fh.getHunks().size()); + + HunkHeader hh = fh.getHunks().get(0); + assertEquals(0, hh.toEditList().size()); + } + + private String makeDiffHeader(String pathA, String pathB, ObjectId aId, + ObjectId bId) { + String a = aId.abbreviate(db).name(); + String b = bId.abbreviate(db).name(); + return DIFF + "a/" + pathA + " " + "b/" + pathB + "\n" + // + "index " + a + ".." + b + " " + REGULAR_FILE + "\n" + // + "--- a/" + pathA + "\n" + // + "+++ b/" + pathB + "\n"; + } + + private String makeDiffHeaderModeChange(String pathA, String pathB, + ObjectId aId, ObjectId bId, String modeA, String modeB) { + String a = aId.abbreviate(db).name(); + String b = bId.abbreviate(db).name(); + return DIFF + "a/" + pathA + " " + "b/" + pathB + "\n" + // + "old mode " + modeA + "\n" + // + "new mode " + modeB + "\n" + // + "index " + a + ".." + b + "\n" + // + "--- a/" + pathA + "\n" + // + "+++ b/" + pathB + "\n"; + } + + private ObjectId blob(String content) throws Exception { + return testDb.blob(content).copy(); + } + +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java new file mode 100644 index 0000000000..c32c78668f --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java @@ -0,0 +1,446 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import java.util.List; + +import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.junit.TestRepository; +import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.RepositoryTestCase; + +public class RenameDetectorTest extends RepositoryTestCase { + private static final String PATH_A = "src/A"; + private static final String PATH_B = "src/B"; + private static final String PATH_H = "src/H"; + private static final String PATH_Q = "src/Q"; + + private RenameDetector rd; + + private TestRepository testDb; + + @Override + public void setUp() throws Exception { + super.setUp(); + testDb = new TestRepository(db); + rd = new RenameDetector(db); + } + + public void testExactRename_OneRename() throws Exception { + ObjectId foo = blob("foo"); + + DiffEntry a = DiffEntry.add(PATH_A, foo); + DiffEntry b = DiffEntry.delete(PATH_Q, foo); + + rd.add(a); + rd.add(b); + + List<DiffEntry> entries = rd.compute(); + assertEquals(1, entries.size()); + assertRename(b, a, 100, entries.get(0)); + } + + public void testExactRename_OneRenameOneModify() throws Exception { + ObjectId foo = blob("foo"); + ObjectId bar = blob("bar"); + + DiffEntry a = DiffEntry.add(PATH_A, foo); + DiffEntry b = DiffEntry.delete(PATH_Q, foo); + + DiffEntry c = DiffEntry.modify(PATH_H); + c.newId = c.oldId = AbbreviatedObjectId.fromObjectId(bar); + + rd.add(a); + rd.add(b); + rd.add(c); + + List<DiffEntry> entries = rd.compute(); + assertEquals(2, entries.size()); + assertRename(b, a, 100, entries.get(0)); + assertSame(c, entries.get(1)); + } + + public void testExactRename_ManyRenames() throws Exception { + ObjectId foo = blob("foo"); + ObjectId bar = blob("bar"); + + DiffEntry a = DiffEntry.add(PATH_A, foo); + DiffEntry b = DiffEntry.delete(PATH_Q, foo); + + DiffEntry c = DiffEntry.add(PATH_H, bar); + DiffEntry d = DiffEntry.delete(PATH_B, bar); + + rd.add(a); + rd.add(b); + rd.add(c); + rd.add(d); + + List<DiffEntry> entries = rd.compute(); + assertEquals(2, entries.size()); + assertRename(b, a, 100, entries.get(0)); + assertRename(d, c, 100, entries.get(1)); + } + + public void testExactRename_MultipleIdenticalDeletes() throws Exception { + ObjectId foo = blob("foo"); + + DiffEntry a = DiffEntry.delete(PATH_A, foo); + DiffEntry b = DiffEntry.delete(PATH_B, foo); + + DiffEntry c = DiffEntry.delete(PATH_H, foo); + DiffEntry d = DiffEntry.add(PATH_Q, foo); + + rd.add(a); + rd.add(b); + rd.add(c); + rd.add(d); + + // Pairs the add with the first delete added + List<DiffEntry> entries = rd.compute(); + assertEquals(3, entries.size()); + assertEquals(b, entries.get(0)); + assertEquals(c, entries.get(1)); + assertRename(a, d, 100, entries.get(2)); + } + + public void testExactRename_PathBreaksTie() throws Exception { + ObjectId foo = blob("foo"); + + DiffEntry a = DiffEntry.add("src/com/foo/a.java", foo); + DiffEntry b = DiffEntry.delete("src/com/foo/b.java", foo); + + DiffEntry c = DiffEntry.add("c.txt", foo); + DiffEntry d = DiffEntry.delete("d.txt", foo); + + // Add out of order to avoid first-match succeeding + rd.add(a); + rd.add(d); + rd.add(b); + rd.add(c); + + List<DiffEntry> entries = rd.compute(); + assertEquals(2, entries.size()); + assertRename(d, c, 100, entries.get(0)); + assertRename(b, a, 100, entries.get(1)); + } + + public void testExactRename_OneDeleteManyAdds() throws Exception { + ObjectId foo = blob("foo"); + + DiffEntry a = DiffEntry.add("src/com/foo/a.java", foo); + DiffEntry b = DiffEntry.add("src/com/foo/b.java", foo); + DiffEntry c = DiffEntry.add("c.txt", foo); + + DiffEntry d = DiffEntry.delete("d.txt", foo); + + rd.add(a); + rd.add(b); + rd.add(c); + rd.add(d); + + List<DiffEntry> entries = rd.compute(); + assertEquals(3, entries.size()); + assertRename(d, c, 100, entries.get(0)); + assertCopy(d, a, 100, entries.get(1)); + assertCopy(d, b, 100, entries.get(2)); + } + + public void testInexactRename_OnePair() throws Exception { + ObjectId aId = blob("foo\nbar\nbaz\nblarg\n"); + ObjectId bId = blob("foo\nbar\nbaz\nblah\n"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + rd.add(a); + rd.add(b); + + List<DiffEntry> entries = rd.compute(); + assertEquals(1, entries.size()); + assertRename(b, a, 66, entries.get(0)); + } + + public void testInexactRename_OneRenameTwoUnrelatedFiles() throws Exception { + ObjectId aId = blob("foo\nbar\nbaz\nblarg\n"); + ObjectId bId = blob("foo\nbar\nbaz\nblah\n"); + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + ObjectId cId = blob("some\nsort\nof\ntext\n"); + ObjectId dId = blob("completely\nunrelated\ntext\n"); + DiffEntry c = DiffEntry.add(PATH_B, cId); + DiffEntry d = DiffEntry.delete(PATH_H, dId); + + rd.add(a); + rd.add(b); + rd.add(c); + rd.add(d); + + List<DiffEntry> entries = rd.compute(); + assertEquals(3, entries.size()); + assertRename(b, a, 66, entries.get(0)); + assertSame(c, entries.get(1)); + assertSame(d, entries.get(2)); + } + + public void testInexactRename_LastByteDifferent() throws Exception { + ObjectId aId = blob("foo\nbar\na"); + ObjectId bId = blob("foo\nbar\nb"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + rd.add(a); + rd.add(b); + + List<DiffEntry> entries = rd.compute(); + assertEquals(1, entries.size()); + assertRename(b, a, 88, entries.get(0)); + } + + public void testInexactRename_NewlinesOnly() throws Exception { + ObjectId aId = blob("\n\n\n"); + ObjectId bId = blob("\n\n\n\n"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + rd.add(a); + rd.add(b); + + List<DiffEntry> entries = rd.compute(); + assertEquals(1, entries.size()); + assertRename(b, a, 74, entries.get(0)); + } + + public void testInexactRenames_OnePair2() throws Exception { + ObjectId aId = blob("ab\nab\nab\nac\nad\nae\n"); + ObjectId bId = blob("ac\nab\nab\nab\naa\na0\na1\n"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + rd.add(a); + rd.add(b); + rd.setRenameScore(50); + + List<DiffEntry> entries = rd.compute(); + assertEquals(1, entries.size()); + assertRename(b, a, 57, entries.get(0)); + } + + public void testNoRenames_SingleByteFiles() throws Exception { + ObjectId aId = blob("a"); + ObjectId bId = blob("b"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + rd.add(a); + rd.add(b); + + List<DiffEntry> entries = rd.compute(); + assertEquals(2, entries.size()); + assertSame(a, entries.get(0)); + assertSame(b, entries.get(1)); + } + + public void testNoRenames_EmptyFile1() throws Exception { + ObjectId aId = blob(""); + DiffEntry a = DiffEntry.add(PATH_A, aId); + + rd.add(a); + + List<DiffEntry> entries = rd.compute(); + assertEquals(1, entries.size()); + assertSame(a, entries.get(0)); + } + + public void testNoRenames_EmptyFile2() throws Exception { + ObjectId aId = blob(""); + ObjectId bId = blob("blah"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + rd.add(a); + rd.add(b); + + List<DiffEntry> entries = rd.compute(); + assertEquals(2, entries.size()); + assertSame(a, entries.get(0)); + assertSame(b, entries.get(1)); + } + + public void testNoRenames_SymlinkAndFile() throws Exception { + ObjectId aId = blob("src/dest"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, aId); + b.oldMode = FileMode.SYMLINK; + + rd.add(a); + rd.add(b); + + List<DiffEntry> entries = rd.compute(); + assertEquals(2, entries.size()); + assertSame(a, entries.get(0)); + assertSame(b, entries.get(1)); + } + + public void testNoRenames_GitlinkAndFile() throws Exception { + ObjectId aId = blob("src/dest"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, aId); + b.oldMode = FileMode.GITLINK; + + rd.add(a); + rd.add(b); + + List<DiffEntry> entries = rd.compute(); + assertEquals(2, entries.size()); + assertSame(a, entries.get(0)); + assertSame(b, entries.get(1)); + } + + public void testNoRenames_SymlinkAndFileSamePath() throws Exception { + ObjectId aId = blob("src/dest"); + + DiffEntry a = DiffEntry.delete(PATH_A, aId); + DiffEntry b = DiffEntry.add(PATH_A, aId); + a.oldMode = FileMode.SYMLINK; + + rd.add(a); + rd.add(b); + + // Deletes should be first + List<DiffEntry> entries = rd.compute(); + assertEquals(2, entries.size()); + assertSame(a, entries.get(0)); + assertSame(b, entries.get(1)); + } + + public void testSetRenameScore_IllegalArgs() throws Exception { + try { + rd.setRenameScore(-1); + fail(); + } catch (IllegalArgumentException e) { + // pass + } + + try { + rd.setRenameScore(101); + fail(); + } catch (IllegalArgumentException e) { + // pass + } + } + + public void testRenameLimit() throws Exception { + ObjectId aId = blob("foo\nbar\nbaz\nblarg\n"); + ObjectId bId = blob("foo\nbar\nbaz\nblah\n"); + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_B, bId); + + ObjectId cId = blob("a\nb\nc\nd\n"); + ObjectId dId = blob("a\nb\nc\n"); + DiffEntry c = DiffEntry.add(PATH_H, cId); + DiffEntry d = DiffEntry.delete(PATH_Q, dId); + + rd.add(a); + rd.add(b); + rd.add(c); + rd.add(d); + + rd.setRenameLimit(1); + + assertTrue(rd.isOverRenameLimit()); + + List<DiffEntry> entries = rd.compute(); + assertEquals(4, entries.size()); + assertSame(a, entries.get(0)); + assertSame(b, entries.get(1)); + assertSame(c, entries.get(2)); + assertSame(d, entries.get(3)); + } + + private ObjectId blob(String content) throws Exception { + return testDb.blob(content).copy(); + } + + private static void assertRename(DiffEntry o, DiffEntry n, int score, + DiffEntry rename) { + assertEquals(ChangeType.RENAME, rename.getChangeType()); + + assertEquals(o.getOldName(), rename.getOldName()); + assertEquals(n.getNewName(), rename.getNewName()); + + assertEquals(o.getOldMode(), rename.getOldMode()); + assertEquals(n.getNewMode(), rename.getNewMode()); + + assertEquals(o.getOldId(), rename.getOldId()); + assertEquals(n.getNewId(), rename.getNewId()); + + assertEquals(score, rename.getScore()); + } + + private static void assertCopy(DiffEntry o, DiffEntry n, int score, + DiffEntry copy) { + assertEquals(ChangeType.COPY, copy.getChangeType()); + + assertEquals(o.getOldName(), copy.getOldName()); + assertEquals(n.getNewName(), copy.getNewName()); + + assertEquals(o.getOldMode(), copy.getOldMode()); + assertEquals(n.getNewMode(), copy.getNewMode()); + + assertEquals(o.getOldId(), copy.getOldId()); + assertEquals(n.getNewId(), copy.getNewId()); + + assertEquals(score, copy.getScore()); + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java new file mode 100644 index 0000000000..d6915eb872 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import junit.framework.TestCase; + +import org.eclipse.jgit.lib.Constants; + +public class SimilarityIndexTest extends TestCase { + public void testIndexing() { + SimilarityIndex si = hash("" // + + "A\n" // + + "B\n" // + + "D\n" // + + "B\n" // + ); + + int key_A = keyFor("A\n"); + int key_B = keyFor("B\n"); + int key_D = keyFor("D\n"); + assertTrue(key_A != key_B && key_A != key_D && key_B != key_D); + + assertEquals(3, si.size()); + assertEquals(2, si.count(si.findIndex(key_A))); + assertEquals(4, si.count(si.findIndex(key_B))); + assertEquals(2, si.count(si.findIndex(key_D))); + } + + public void testCommonScore_SameFiles() { + String text = "" // + + "A\n" // + + "B\n" // + + "D\n" // + + "B\n"; + SimilarityIndex src = hash(text); + SimilarityIndex dst = hash(text); + assertEquals(8, src.common(dst)); + assertEquals(8, dst.common(src)); + + assertEquals(100, src.score(dst, 100)); + assertEquals(100, dst.score(src, 100)); + } + + public void testCommonScore_EmptyFiles() { + SimilarityIndex src = hash(""); + SimilarityIndex dst = hash(""); + assertEquals(0, src.common(dst)); + assertEquals(0, dst.common(src)); + } + + public void testCommonScore_TotallyDifferentFiles() { + SimilarityIndex src = hash("A\n"); + SimilarityIndex dst = hash("D\n"); + assertEquals(0, src.common(dst)); + assertEquals(0, dst.common(src)); + } + + public void testCommonScore_SimiliarBy75() { + SimilarityIndex src = hash("A\nB\nC\nD\n"); + SimilarityIndex dst = hash("A\nB\nC\nQ\n"); + assertEquals(6, src.common(dst)); + assertEquals(6, dst.common(src)); + + assertEquals(75, src.score(dst, 100)); + assertEquals(75, dst.score(src, 100)); + } + + private static SimilarityIndex hash(String text) { + SimilarityIndex src = new SimilarityIndex() { + @Override + void hash(byte[] raw, int ptr, final int end) { + while (ptr < end) { + int hash = raw[ptr] & 0xff; + int start = ptr; + do { + int c = raw[ptr++] & 0xff; + if (c == '\n') + break; + } while (ptr < end && ptr - start < 64); + add(hash, ptr - start); + } + } + }; + byte[] raw = Constants.encode(text); + src.setFileSize(raw.length); + src.hash(raw, 0, raw.length); + src.sort(); + return src; + } + + private static int keyFor(String line) { + SimilarityIndex si = hash(line); + assertEquals("single line scored", 1, si.size()); + return si.key(0); + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/FileHeaderTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/FileHeaderTest.java index 8d9e302a82..17e99779cf 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/FileHeaderTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/FileHeaderTest.java @@ -45,6 +45,7 @@ package org.eclipse.jgit.patch; import junit.framework.TestCase; +import org.eclipse.jgit.diff.DiffEntry; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.ObjectId; @@ -153,7 +154,7 @@ public class FileHeaderTest extends TestCase { assertParse(fh); assertEquals("/dev/null", fh.getOldName()); - assertSame(FileHeader.DEV_NULL, fh.getOldName()); + assertSame(DiffEntry.DEV_NULL, fh.getOldName()); assertEquals("\u00c5ngstr\u00f6m", fh.getNewName()); assertSame(FileHeader.ChangeType.ADD, fh.getChangeType()); @@ -179,7 +180,7 @@ public class FileHeaderTest extends TestCase { assertEquals("\u00c5ngstr\u00f6m", fh.getOldName()); assertEquals("/dev/null", fh.getNewName()); - assertSame(FileHeader.DEV_NULL, fh.getNewName()); + assertSame(DiffEntry.DEV_NULL, fh.getNewName()); assertSame(FileHeader.ChangeType.DELETE, fh.getChangeType()); assertSame(FileHeader.PatchType.UNIFIED, fh.getPatchType()); diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/PatchCcTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/PatchCcTest.java index e97d373d9a..1d879cba67 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/PatchCcTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/PatchCcTest.java @@ -46,10 +46,11 @@ package org.eclipse.jgit.patch; import java.io.IOException; import java.io.InputStream; -import org.eclipse.jgit.lib.FileMode; - import junit.framework.TestCase; +import org.eclipse.jgit.diff.DiffEntry; +import org.eclipse.jgit.lib.FileMode; + public class PatchCcTest extends TestCase { public void testParse_OneFileCc() throws IOException { final Patch p = parseTestPatchFile(); @@ -113,7 +114,7 @@ public class PatchCcTest extends TestCase { final CombinedFileHeader cfh = (CombinedFileHeader) p.getFiles().get(0); - assertSame(FileHeader.DEV_NULL, cfh.getOldName()); + assertSame(DiffEntry.DEV_NULL, cfh.getOldName()); assertEquals("d", cfh.getNewName()); assertEquals(187, cfh.startOffset); @@ -168,7 +169,7 @@ public class PatchCcTest extends TestCase { final CombinedFileHeader cfh = (CombinedFileHeader) p.getFiles().get(0); assertEquals("a", cfh.getOldName()); - assertSame(FileHeader.DEV_NULL, cfh.getNewName()); + assertSame(DiffEntry.DEV_NULL, cfh.getNewName()); assertEquals(187, cfh.startOffset); diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties index e4a603cbba..ca1832e571 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties @@ -6,6 +6,7 @@ JRELacksMD5Implementation=JRE lacks MD5 implementation URINotSupported=URI not supported: {0} URLNotFound={0} not found aNewObjectIdIsRequired=A NewObjectId is required. +abbreviationLengthMustBeNonNegative=Abbreviation length must not be negative. advertisementCameBefore=advertisement of {0}^{} came before {1} advertisementOfCameBefore=advertisement of {0}^{} came before {1} amazonS3ActionFailed={0} of '{1}' failed: {2} {3} @@ -299,7 +300,11 @@ remoteDoesNotHaveSpec=Remote does not have {0} available for fetch. remoteDoesNotSupportSmartHTTPPush=remote does not support smart HTTP push remoteHungUpUnexpectedly=remote hung up unexpectedly remoteNameCantBeNull=Remote name can't be null. +renamesAlreadyFound=Renames have already been found. +renamesFindingByContent=Finding renames by content similarity +renamesFindingExact=Finding exact renames repositoryAlreadyExists=Repository already exists: {0} +repositoryIsRequired=Repository is required. repositoryNotFound=repository not found: {0} repositoryState_applyMailbox=Apply mailbox repositoryState_bisecting=Bisecting @@ -317,6 +322,7 @@ shortCompressedStreamAt=Short compressed stream at {0} shortReadOfBlock=Short read of block. shortReadOfOptionalDIRCExtensionExpectedAnotherBytes=Short read of optional DIRC extension {0}; expected another {1} bytes within the section. shortSkipOfBlock=Short skip of block. +similarityScoreMustBeWithinBounds=Similarity score must be between 0 and 100. smartHTTPPushDisabled=smart HTTP push disabled sourceDestinationMustMatch=Source/Destination must match. sourceIsNotAWildcard=Source is not a wildcard. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java index 94cf1e48bd..71e5e4a899 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java @@ -66,6 +66,7 @@ public class JGitText extends TranslationBundle { /***/ public String URINotSupported; /***/ public String URLNotFound; /***/ public String aNewObjectIdIsRequired; + /***/ public String abbreviationLengthMustBeNonNegative; /***/ public String advertisementCameBefore; /***/ public String advertisementOfCameBefore; /***/ public String amazonS3ActionFailed; @@ -358,7 +359,11 @@ public class JGitText extends TranslationBundle { /***/ public String remoteDoesNotSupportSmartHTTPPush; /***/ public String remoteHungUpUnexpectedly; /***/ public String remoteNameCantBeNull; + /***/ public String renamesAlreadyFound; + /***/ public String renamesFindingByContent; + /***/ public String renamesFindingExact; /***/ public String repositoryAlreadyExists; + /***/ public String repositoryIsRequired; /***/ public String repositoryNotFound; /***/ public String repositoryState_applyMailbox; /***/ public String repositoryState_bisecting; @@ -376,6 +381,7 @@ public class JGitText extends TranslationBundle { /***/ public String shortReadOfBlock; /***/ public String shortReadOfOptionalDIRCExtensionExpectedAnotherBytes; /***/ public String shortSkipOfBlock; + /***/ public String similarityScoreMustBeWithinBounds; /***/ public String smartHTTPPushDisabled; /***/ public String sourceDestinationMustMatch; /***/ public String sourceIsNotAWildcard; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffConfig.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffConfig.java new file mode 100644 index 0000000000..91b7467aee --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffConfig.java @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import org.eclipse.jgit.lib.Config; +import org.eclipse.jgit.lib.Config.SectionParser; + +/** Keeps track of diff related configuration options. */ +public class DiffConfig { + /** Key for {@link Config#get(SectionParser)}. */ + public static final Config.SectionParser<DiffConfig> KEY = new SectionParser<DiffConfig>() { + public DiffConfig parse(final Config cfg) { + return new DiffConfig(cfg); + } + }; + + private final int renameLimit; + + private DiffConfig(final Config rc) { + renameLimit = rc.getInt("diff", "renamelimit", 200); + } + + /** @return limit on number of paths to perform inexact rename detection. */ + public int getRenameLimit() { + return renameLimit; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java new file mode 100644 index 0000000000..304e4cff39 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java @@ -0,0 +1,345 @@ +/* + * Copyright (C) 2008-2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.MutableObjectId; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.treewalk.TreeWalk; + +/** A value class representing a change to a file */ +public class DiffEntry { + private static final AbbreviatedObjectId A_ZERO = AbbreviatedObjectId + .fromObjectId(ObjectId.zeroId()); + + /** Magical file name used for file adds or deletes. */ + public static final String DEV_NULL = "/dev/null"; + + /** General type of change a single file-level patch describes. */ + public static enum ChangeType { + /** Add a new file to the project */ + ADD, + + /** Modify an existing file in the project (content and/or mode) */ + MODIFY, + + /** Delete an existing file from the project */ + DELETE, + + /** Rename an existing file to a new location */ + RENAME, + + /** Copy an existing file to a new location, keeping the original */ + COPY; + } + + /** + * Create an empty DiffEntry + */ + protected DiffEntry(){ + // reduce the visibility of the default constructor + } + + /** + * Convert the TreeWalk into DiffEntry headers. + * + * @param walk + * the TreeWalk to walk through. Must have exactly two trees. + * @return headers describing the changed files. + * @throws IOException + * the repository cannot be accessed. + */ + public static List<DiffEntry> scan(TreeWalk walk) throws IOException { + List<DiffEntry> r = new ArrayList<DiffEntry>(); + MutableObjectId idBuf = new MutableObjectId(); + while (walk.next()) { + DiffEntry entry = new DiffEntry(); + + walk.getObjectId(idBuf, 0); + entry.oldId = AbbreviatedObjectId.fromObjectId(idBuf); + + walk.getObjectId(idBuf, 1); + entry.newId = AbbreviatedObjectId.fromObjectId(idBuf); + + entry.oldMode = walk.getFileMode(0); + entry.newMode = walk.getFileMode(1); + entry.newName = entry.oldName = walk.getPathString(); + + if (entry.oldMode == FileMode.MISSING) { + entry.oldName = DiffEntry.DEV_NULL; + entry.changeType = ChangeType.ADD; + r.add(entry); + + } else if (entry.newMode == FileMode.MISSING) { + entry.newName = DiffEntry.DEV_NULL; + entry.changeType = ChangeType.DELETE; + r.add(entry); + + } else { + entry.changeType = ChangeType.MODIFY; + if (RenameDetector.sameType(entry.oldMode, entry.newMode)) + r.add(entry); + else + r.addAll(breakModify(entry)); + } + } + return r; + } + + static DiffEntry add(String path, AnyObjectId id) { + DiffEntry e = new DiffEntry(); + e.oldId = A_ZERO; + e.oldMode = FileMode.MISSING; + e.oldName = DEV_NULL; + + e.newId = AbbreviatedObjectId.fromObjectId(id); + e.newMode = FileMode.REGULAR_FILE; + e.newName = path; + e.changeType = ChangeType.ADD; + return e; + } + + static DiffEntry delete(String path, AnyObjectId id) { + DiffEntry e = new DiffEntry(); + e.oldId = AbbreviatedObjectId.fromObjectId(id); + e.oldMode = FileMode.REGULAR_FILE; + e.oldName = path; + + e.newId = A_ZERO; + e.newMode = FileMode.MISSING; + e.newName = DEV_NULL; + e.changeType = ChangeType.DELETE; + return e; + } + + static DiffEntry modify(String path) { + DiffEntry e = new DiffEntry(); + e.oldMode = FileMode.REGULAR_FILE; + e.oldName = path; + + e.newMode = FileMode.REGULAR_FILE; + e.newName = path; + e.changeType = ChangeType.MODIFY; + return e; + } + + static List<DiffEntry> breakModify(DiffEntry entry) { + DiffEntry del = new DiffEntry(); + del.oldId = entry.getOldId(); + del.oldMode = entry.getOldMode(); + del.oldName = entry.getOldName(); + + del.newId = A_ZERO; + del.newMode = FileMode.MISSING; + del.newName = DiffEntry.DEV_NULL; + del.changeType = ChangeType.DELETE; + + DiffEntry add = new DiffEntry(); + add.oldId = A_ZERO; + add.oldMode = FileMode.MISSING; + add.oldName = DiffEntry.DEV_NULL; + + add.newId = entry.getNewId(); + add.newMode = entry.getNewMode(); + add.newName = entry.getNewName(); + add.changeType = ChangeType.ADD; + return Arrays.asList(del, add); + } + + static DiffEntry pair(ChangeType changeType, DiffEntry src, DiffEntry dst, + int score) { + DiffEntry r = new DiffEntry(); + + r.oldId = src.oldId; + r.oldMode = src.oldMode; + r.oldName = src.oldName; + + r.newId = dst.newId; + r.newMode = dst.newMode; + r.newName = dst.newName; + + r.changeType = changeType; + r.score = score; + + return r; + } + + /** File name of the old (pre-image). */ + protected String oldName; + + /** File name of the new (post-image). */ + protected String newName; + + /** Old mode of the file, if described by the patch, else null. */ + protected FileMode oldMode; + + /** New mode of the file, if described by the patch, else null. */ + protected FileMode newMode; + + /** General type of change indicated by the patch. */ + protected ChangeType changeType; + + /** Similarity score if {@link #changeType} is a copy or rename. */ + protected int score; + + /** ObjectId listed on the index line for the old (pre-image) */ + protected AbbreviatedObjectId oldId; + + /** ObjectId listed on the index line for the new (post-image) */ + protected AbbreviatedObjectId newId; + + /** + * Get the old name associated with this file. + * <p> + * The meaning of the old name can differ depending on the semantic meaning + * of this patch: + * <ul> + * <li><i>file add</i>: always <code>/dev/null</code></li> + * <li><i>file modify</i>: always {@link #getNewName()}</li> + * <li><i>file delete</i>: always the file being deleted</li> + * <li><i>file copy</i>: source file the copy originates from</li> + * <li><i>file rename</i>: source file the rename originates from</li> + * </ul> + * + * @return old name for this file. + */ + public String getOldName() { + return oldName; + } + + /** + * Get the new name associated with this file. + * <p> + * The meaning of the new name can differ depending on the semantic meaning + * of this patch: + * <ul> + * <li><i>file add</i>: always the file being created</li> + * <li><i>file modify</i>: always {@link #getOldName()}</li> + * <li><i>file delete</i>: always <code>/dev/null</code></li> + * <li><i>file copy</i>: destination file the copy ends up at</li> + * <li><i>file rename</i>: destination file the rename ends up at/li> + * </ul> + * + * @return new name for this file. + */ + public String getNewName() { + return newName; + } + + /** @return the old file mode, if described in the patch */ + public FileMode getOldMode() { + return oldMode; + } + + /** @return the new file mode, if described in the patch */ + public FileMode getNewMode() { + return newMode; + } + + /** @return the type of change this patch makes on {@link #getNewName()} */ + public ChangeType getChangeType() { + return changeType; + } + + /** + * @return similarity score between {@link #getOldName()} and + * {@link #getNewName()} if {@link #getChangeType()} is + * {@link ChangeType#COPY} or {@link ChangeType#RENAME}. + */ + public int getScore() { + return score; + } + + /** + * Get the old object id from the <code>index</code>. + * + * @return the object id; null if there is no index line + */ + public AbbreviatedObjectId getOldId() { + return oldId; + } + + /** + * Get the new object id from the <code>index</code>. + * + * @return the object id; null if there is no index line + */ + public AbbreviatedObjectId getNewId() { + return newId; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append("DiffEntry["); + buf.append(changeType); + buf.append(" "); + switch (changeType) { + case ADD: + buf.append(newName); + break; + case COPY: + buf.append(oldName + "->" + newName); + break; + case DELETE: + buf.append(oldName); + break; + case MODIFY: + buf.append(oldName); + break; + case RENAME: + buf.append(oldName + "->" + newName); + break; + } + buf.append("]"); + return buf.toString(); + } +}
\ No newline at end of file diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java index c40d3b7000..cdcc5e63e4 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java @@ -44,14 +44,28 @@ package org.eclipse.jgit.diff; +import static org.eclipse.jgit.lib.Constants.encode; import static org.eclipse.jgit.lib.Constants.encodeASCII; +import static org.eclipse.jgit.lib.FileMode.GITLINK; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.List; import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.patch.FileHeader; +import org.eclipse.jgit.patch.HunkHeader; +import org.eclipse.jgit.patch.FileHeader.PatchType; +import org.eclipse.jgit.util.QuotedString; +import org.eclipse.jgit.util.io.DisabledOutputStream; /** * Format an {@link EditList} as a Git style unified patch script. @@ -59,11 +73,43 @@ import org.eclipse.jgit.patch.FileHeader; public class DiffFormatter { private static final byte[] noNewLine = encodeASCII("\\ No newline at end of file\n"); + private final OutputStream out; + + private Repository db; + private int context; - /** Create a new formatter with a default level of context. */ - public DiffFormatter() { + private int abbreviationLength; + + private RawText.Factory rawTextFactory = RawText.FACTORY; + + /** + * Create a new formatter with a default level of context. + * + * @param out + * the stream the formatter will write line data to. This stream + * should have buffering arranged by the caller, as many small + * writes are performed to it. + */ + public DiffFormatter(OutputStream out) { + this.out = out; setContext(3); + setAbbreviationLength(8); + } + + /** @return the stream we are outputting data to. */ + protected OutputStream getOutputStream() { + return out; + } + + /** + * Set the repository the formatter can load object contents from. + * + * @param repository + * source repository holding referenced objects. + */ + public void setRepository(Repository repository) { + db = repository; } /** @@ -76,19 +122,217 @@ public class DiffFormatter { */ public void setContext(final int lineCount) { if (lineCount < 0) - throw new IllegalArgumentException(JGitText.get().contextMustBeNonNegative); + throw new IllegalArgumentException( + JGitText.get().contextMustBeNonNegative); context = lineCount; } /** + * Change the number of digits to show in an ObjectId. + * + * @param count + * number of digits to show in an ObjectId. + */ + public void setAbbreviationLength(final int count) { + if (count < 0) + throw new IllegalArgumentException( + JGitText.get().abbreviationLengthMustBeNonNegative); + abbreviationLength = count; + } + + /** + * Set the helper that constructs difference output. + * + * @param type + * the factory to create different output. Different types of + * factories can produce different whitespace behavior, for + * example. + * @see RawText#FACTORY + * @see RawTextIgnoreAllWhitespace#FACTORY + * @see RawTextIgnoreLeadingWhitespace#FACTORY + * @see RawTextIgnoreTrailingWhitespace#FACTORY + * @see RawTextIgnoreWhitespaceChange#FACTORY + */ + public void setRawTextFactory(RawText.Factory type) { + rawTextFactory = type; + } + + /** + * Flush the underlying output stream of this formatter. + * + * @throws IOException + * the stream's own flush method threw an exception. + */ + public void flush() throws IOException { + out.flush(); + } + + /** + * Format a patch script from a list of difference entries. + * + * @param entries + * entries describing the affected files. + * @throws IOException + * a file's content cannot be read, or the output stream cannot + * be written to. + */ + public void format(List<? extends DiffEntry> entries) throws IOException { + for (DiffEntry ent : entries) + format(ent); + } + + /** + * Format a patch script for one file entry. + * + * @param ent + * the entry to be formatted. + * @throws IOException + * a file's content cannot be read, or the output stream cannot + * be written to. + */ + public void format(DiffEntry ent) throws IOException { + writeDiffHeader(out, ent); + + if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) { + writeGitLinkDiffText(out, ent); + } else { + byte[] aRaw = open(ent.getOldMode(), ent.getOldId()); + byte[] bRaw = open(ent.getNewMode(), ent.getNewId()); + + if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) { + out.write(encodeASCII("Binary files differ\n")); + + } else { + RawText a = rawTextFactory.create(aRaw); + RawText b = rawTextFactory.create(bRaw); + formatEdits(a, b, new MyersDiff(a, b).getEdits()); + } + } + } + + private void writeGitLinkDiffText(OutputStream o, DiffEntry ent) + throws IOException { + if (ent.getOldMode() == GITLINK) { + o.write(encodeASCII("-Subproject commit " + ent.getOldId().name() + + "\n")); + } + if (ent.getNewMode() == GITLINK) { + o.write(encodeASCII("+Subproject commit " + ent.getNewId().name() + + "\n")); + } + } + + private void writeDiffHeader(OutputStream o, DiffEntry ent) + throws IOException { + String oldName = quotePath("a/" + ent.getOldName()); + String newName = quotePath("b/" + ent.getNewName()); + o.write(encode("diff --git " + oldName + " " + newName + "\n")); + + switch (ent.getChangeType()) { + case ADD: + o.write(encodeASCII("new file mode ")); + ent.getNewMode().copyTo(o); + o.write('\n'); + break; + + case DELETE: + o.write(encodeASCII("deleted file mode ")); + ent.getOldMode().copyTo(o); + o.write('\n'); + break; + + case RENAME: + o.write(encodeASCII("similarity index " + ent.getScore() + "%")); + o.write('\n'); + + o.write(encode("rename from " + quotePath(ent.getOldName()))); + o.write('\n'); + + o.write(encode("rename to " + quotePath(ent.getNewName()))); + o.write('\n'); + break; + + case COPY: + o.write(encodeASCII("similarity index " + ent.getScore() + "%")); + o.write('\n'); + + o.write(encode("copy from " + quotePath(ent.getOldName()))); + o.write('\n'); + + o.write(encode("copy to " + quotePath(ent.getNewName()))); + o.write('\n'); + + if (!ent.getOldMode().equals(ent.getNewMode())) { + o.write(encodeASCII("new file mode ")); + ent.getNewMode().copyTo(o); + o.write('\n'); + } + break; + } + + switch (ent.getChangeType()) { + case RENAME: + case MODIFY: + if (!ent.getOldMode().equals(ent.getNewMode())) { + o.write(encodeASCII("old mode ")); + ent.getOldMode().copyTo(o); + o.write('\n'); + + o.write(encodeASCII("new mode ")); + ent.getNewMode().copyTo(o); + o.write('\n'); + } + } + + o.write(encodeASCII("index " // + + format(ent.getOldId()) // + + ".." // + + format(ent.getNewId()))); + if (ent.getOldMode().equals(ent.getNewMode())) { + o.write(' '); + ent.getNewMode().copyTo(o); + } + o.write('\n'); + o.write(encode("--- " + oldName + '\n')); + o.write(encode("+++ " + newName + '\n')); + } + + private String format(AbbreviatedObjectId oldId) { + if (oldId.isComplete() && db != null) + oldId = oldId.toObjectId().abbreviate(db, abbreviationLength); + return oldId.name(); + } + + private static String quotePath(String name) { + String q = QuotedString.GIT_PATH.quote(name); + return ('"' + name + '"').equals(q) ? name : q; + } + + private byte[] open(FileMode mode, AbbreviatedObjectId id) + throws IOException { + if (mode == FileMode.MISSING) + return new byte[] {}; + + if (mode.getObjectType() != Constants.OBJ_BLOB) + return new byte[] {}; + + if (db == null) + throw new IllegalStateException(JGitText.get().repositoryIsRequired); + if (id.isComplete()) { + ObjectLoader ldr = db.openObject(id.toObjectId()); + return ldr.getCachedBytes(); + } + + return new byte[] {}; + } + + /** * Format a patch script, reusing a previously parsed FileHeader. * <p> * This formatter is primarily useful for editing an existing patch script * to increase or reduce the number of lines of context within the script. * All header lines are reused as-is from the supplied FileHeader. * - * @param out - * stream to write the patch script out to. * @param head * existing file header containing the header lines to copy. * @param a @@ -100,8 +344,8 @@ public class DiffFormatter { * @throws IOException * writing to the supplied stream failed. */ - public void format(final OutputStream out, final FileHeader head, - final RawText a, final RawText b) throws IOException { + public void format(final FileHeader head, final RawText a, final RawText b) + throws IOException { // Reuse the existing FileHeader as-is by blindly copying its // header lines, but avoiding its hunks. Instead we recreate // the hunks from the text instances we have been supplied. @@ -112,19 +356,22 @@ public class DiffFormatter { end = head.getHunks().get(0).getStartOffset(); out.write(head.getBuffer(), start, end - start); - formatEdits(out, a, b, head.toEditList()); + formatEdits(a, b, head.toEditList()); } /** * Formats a list of edits in unified diff format - * @param out where the unified diff is written to - * @param a the text A which was compared - * @param b the text B which was compared - * @param edits some differences which have been calculated between A and B + * + * @param a + * the text A which was compared + * @param b + * the text B which was compared + * @param edits + * some differences which have been calculated between A and B * @throws IOException */ - public void formatEdits(final OutputStream out, final RawText a, - final RawText b, final EditList edits) throws IOException { + public void formatEdits(final RawText a, final RawText b, + final EditList edits) throws IOException { for (int curIdx = 0; curIdx < edits.size();) { Edit curEdit = edits.get(curIdx); final int endIdx = findCombinedEnd(edits, curIdx); @@ -135,18 +382,24 @@ public class DiffFormatter { final int aEnd = Math.min(a.size(), endEdit.getEndA() + context); final int bEnd = Math.min(b.size(), endEdit.getEndB() + context); - writeHunkHeader(out, aCur, aEnd, bCur, bEnd); + writeHunkHeader(aCur, aEnd, bCur, bEnd); while (aCur < aEnd || bCur < bEnd) { if (aCur < curEdit.getBeginA() || endIdx + 1 < curIdx) { - writeContextLine(out, a, aCur, isEndOfLineMissing(a, aCur)); + writeContextLine(a, aCur); + if (isEndOfLineMissing(a, aCur)) + out.write(noNewLine); aCur++; bCur++; } else if (aCur < curEdit.getEndA()) { - writeRemovedLine(out, a, aCur, isEndOfLineMissing(a, aCur)); + writeRemovedLine(a, aCur); + if (isEndOfLineMissing(a, aCur)) + out.write(noNewLine); aCur++; } else if (bCur < curEdit.getEndB()) { - writeAddedLine(out, b, bCur, isEndOfLineMissing(b, bCur)); + writeAddedLine(b, bCur); + if (isEndOfLineMissing(b, bCur)) + out.write(noNewLine); bCur++; } @@ -157,21 +410,17 @@ public class DiffFormatter { } /** - * Output a line of diff context + * Output a line of context (unmodified line). * - * @param out - * OutputStream * @param text * RawText for accessing raw data * @param line * the line number within text - * @param endOfLineMissing - * true if we should add the GNU end of line missing warning * @throws IOException */ - protected void writeContextLine(final OutputStream out, final RawText text, - final int line, boolean endOfLineMissing) throws IOException { - writeLine(out, ' ', text, line, endOfLineMissing); + protected void writeContextLine(final RawText text, final int line) + throws IOException { + writeLine(' ', text, line); } private boolean isEndOfLineMissing(final RawText text, final int line) { @@ -179,46 +428,36 @@ public class DiffFormatter { } /** - * Output an added line + * Output an added line. * - * @param out - * OutputStream * @param text * RawText for accessing raw data * @param line * the line number within text - * @param endOfLineMissing - * true if we should add the gnu end of line missing warning * @throws IOException */ - protected void writeAddedLine(final OutputStream out, final RawText text, final int line, boolean endOfLineMissing) + protected void writeAddedLine(final RawText text, final int line) throws IOException { - writeLine(out, '+', text, line, endOfLineMissing); + writeLine('+', text, line); } /** * Output a removed line * - * @param out - * OutputStream * @param text * RawText for accessing raw data * @param line * the line number within text - * @param endOfLineMissing - * true if we should add the gnu end of line missing warning * @throws IOException */ - protected void writeRemovedLine(final OutputStream out, final RawText text, - final int line, boolean endOfLineMissing) throws IOException { - writeLine(out, '-', text, line, endOfLineMissing); + protected void writeRemovedLine(final RawText text, final int line) + throws IOException { + writeLine('-', text, line); } /** * Output a hunk header * - * @param out - * OutputStream * @param aStartLine * within first source * @param aEndLine @@ -229,20 +468,20 @@ public class DiffFormatter { * within second source * @throws IOException */ - protected void writeHunkHeader(final OutputStream out, int aStartLine, int aEndLine, + protected void writeHunkHeader(int aStartLine, int aEndLine, int bStartLine, int bEndLine) throws IOException { out.write('@'); out.write('@'); - writeRange(out, '-', aStartLine + 1, aEndLine - aStartLine); - writeRange(out, '+', bStartLine + 1, bEndLine - bStartLine); + writeRange('-', aStartLine + 1, aEndLine - aStartLine); + writeRange('+', bStartLine + 1, bEndLine - bStartLine); out.write(' '); out.write('@'); out.write('@'); out.write('\n'); } - private static void writeRange(final OutputStream out, final char prefix, - final int begin, final int cnt) throws IOException { + private void writeRange(final char prefix, final int begin, final int cnt) + throws IOException { out.write(' '); out.write(prefix); switch (cnt) { @@ -271,18 +510,75 @@ public class DiffFormatter { } } - private static void writeLine(final OutputStream out, final char prefix, - final RawText text, final int cur, boolean noNewLineIndicator) throws IOException { + /** + * Write a standard patch script line. + * + * @param prefix + * prefix before the line, typically '-', '+', ' '. + * @param text + * the text object to obtain the line from. + * @param cur + * line number to output. + * @throws IOException + * the stream threw an exception while writing to it. + */ + protected void writeLine(final char prefix, final RawText text, + final int cur) throws IOException { out.write(prefix); text.writeLine(out, cur); out.write('\n'); - if (noNewLineIndicator) - writeNoNewLine(out); } - private static void writeNoNewLine(final OutputStream out) - throws IOException { - out.write(noNewLine); + /** + * Creates a {@link FileHeader} representing the given {@link DiffEntry} + * <p> + * This method does not use the OutputStream associated with this + * DiffFormatter instance. It is therefore safe to instantiate this + * DiffFormatter instance with a {@link DisabledOutputStream} if this method + * is the only one that will be used. + * + * @param ent + * the DiffEntry to create the FileHeader for + * @return a FileHeader representing the DiffEntry. The FileHeader's buffer + * will contain only the header of the diff output. It will also + * contain one {@link HunkHeader}. + * @throws IOException + * the stream threw an exception while writing to it, or one of + * the blobs referenced by the DiffEntry could not be read. + * @throws CorruptObjectException + * one of the blobs referenced by the DiffEntry is corrupt. + * @throws MissingObjectException + * one of the blobs referenced by the DiffEntry is missing. + */ + public FileHeader createFileHeader(DiffEntry ent) throws IOException, + CorruptObjectException, MissingObjectException { + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + final EditList editList; + final FileHeader.PatchType type; + + writeDiffHeader(buf, ent); + + if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) { + writeGitLinkDiffText(buf, ent); + editList = new EditList(); + type = PatchType.UNIFIED; + } else { + byte[] aRaw = open(ent.getOldMode(), ent.getOldId()); + byte[] bRaw = open(ent.getNewMode(), ent.getNewId()); + + if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) { + buf.write(encodeASCII("Binary files differ\n")); + editList = new EditList(); + type = PatchType.BINARY; + } else { + RawText a = rawTextFactory.create(aRaw); + RawText b = rawTextFactory.create(bRaw); + editList = new MyersDiff(a, b).getEdits(); + type = PatchType.UNIFIED; + } + } + + return new FileHeader(buf.toByteArray(), editList, type); } private int findCombinedEnd(final List<Edit> edits, final int i) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java index c01cb7ad8e..574d2edf91 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java @@ -65,6 +65,25 @@ import org.eclipse.jgit.util.RawParseUtils; * they are converting from "line number" to "element index". */ public class RawText implements Sequence { + /** Creates a RawText instance. */ + public static interface Factory { + /** + * Construct a RawText instance for the content. + * + * @param input + * the content array. + * @return a RawText instance wrapping this content. + */ + RawText create(byte[] input); + } + + /** Creates RawText that does not treat whitespace specially. */ + public static final Factory FACTORY = new Factory() { + public RawText create(byte[] input) { + return new RawText(input); + } + }; + /** Number of bytes to check for heuristics in {@link #isBinary(byte[])} */ private static final int FIRST_FEW_BYTES = 8000; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java index f72259605e..211618a3fb 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java @@ -51,6 +51,13 @@ import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace; * A version of {@link RawText} that ignores all whitespace. */ public class RawTextIgnoreAllWhitespace extends RawText { + /** Creates RawText that ignores all whitespace. */ + @SuppressWarnings("hiding") + public static final Factory FACTORY = new Factory() { + public RawText create(byte[] input) { + return new RawTextIgnoreAllWhitespace(input); + } + }; /** * Create a new sequence from an existing content byte array. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreLeadingWhitespace.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreLeadingWhitespace.java index 1d928766a9..23778973b7 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreLeadingWhitespace.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreLeadingWhitespace.java @@ -50,6 +50,13 @@ import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace; * A version of {@link RawText} that ignores leading whitespace. */ public class RawTextIgnoreLeadingWhitespace extends RawText { + /** Creates RawText that ignores only leading whitespace. */ + @SuppressWarnings("hiding") + public static final Factory FACTORY = new Factory() { + public RawText create(byte[] input) { + return new RawTextIgnoreLeadingWhitespace(input); + } + }; /** * Create a new sequence from an existing content byte array. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreTrailingWhitespace.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreTrailingWhitespace.java index b9095940cf..3feb2e783a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreTrailingWhitespace.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreTrailingWhitespace.java @@ -50,6 +50,13 @@ import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace; * A version of {@link RawText} that ignores trailing whitespace. */ public class RawTextIgnoreTrailingWhitespace extends RawText { + /** Creates RawText that ignores only trailing whitespace. */ + @SuppressWarnings("hiding") + public static final Factory FACTORY = new Factory() { + public RawText create(byte[] input) { + return new RawTextIgnoreTrailingWhitespace(input); + } + }; /** * Create a new sequence from an existing content byte array. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreWhitespaceChange.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreWhitespaceChange.java index 399f038bca..e6bd8e98b7 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreWhitespaceChange.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreWhitespaceChange.java @@ -45,14 +45,21 @@ package org.eclipse.jgit.diff; import static org.eclipse.jgit.util.RawCharUtil.isWhitespace; -import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace; import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace; +import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace; /** * A version of {@link RawText} that ignores changes in the amount of * whitespace, as well as trailing whitespace. */ public class RawTextIgnoreWhitespaceChange extends RawText { + /** Creates RawText that ignores only whitespace changes. */ + @SuppressWarnings("hiding") + public static final Factory FACTORY = new Factory() { + public RawText create(byte[] input) { + return new RawTextIgnoreWhitespaceChange(input); + } + }; /** * Create a new sequence from an existing content byte array. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java new file mode 100644 index 0000000000..bc23940535 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java @@ -0,0 +1,536 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; + +import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.NullProgressMonitor; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.lib.Repository; + +/** Detect and resolve object renames. */ +public class RenameDetector { + private static final int EXACT_RENAME_SCORE = 100; + + private static final Comparator<DiffEntry> DIFF_COMPARATOR = new Comparator<DiffEntry>() { + public int compare(DiffEntry a, DiffEntry b) { + int cmp = nameOf(a).compareTo(nameOf(b)); + if (cmp == 0) + cmp = sortOf(a.getChangeType()) - sortOf(b.getChangeType()); + return cmp; + } + + private String nameOf(DiffEntry ent) { + // Sort by the new name, unless the change is a delete. On + // deletes the new name is /dev/null, so we sort instead by + // the old name. + // + if (ent.changeType == ChangeType.DELETE) + return ent.oldName; + return ent.newName; + } + + private int sortOf(ChangeType changeType) { + // Sort deletes before adds so that a major type change for + // a file path (such as symlink to regular file) will first + // remove the path, then add it back with the new type. + // + switch (changeType) { + case DELETE: + return 1; + case ADD: + return 2; + default: + return 10; + } + } + }; + + private final List<DiffEntry> entries = new ArrayList<DiffEntry>(); + + private List<DiffEntry> deleted = new ArrayList<DiffEntry>(); + + private List<DiffEntry> added = new ArrayList<DiffEntry>(); + + private boolean done; + + private final Repository repo; + + /** Similarity score required to pair an add/delete as a rename. */ + private int renameScore = 60; + + /** Limit in the number of files to consider for renames. */ + private int renameLimit; + + /** Set if the number of adds or deletes was over the limit. */ + private boolean overRenameLimit; + + /** + * Create a new rename detector for the given repository + * + * @param repo + * the repository to use for rename detection + */ + public RenameDetector(Repository repo) { + this.repo = repo; + + DiffConfig cfg = repo.getConfig().get(DiffConfig.KEY); + renameLimit = cfg.getRenameLimit(); + } + + /** + * @return minimum score required to pair an add/delete as a rename. The + * score ranges are within the bounds of (0, 100). + */ + public int getRenameScore() { + return renameScore; + } + + /** + * Set the minimum score required to pair an add/delete as a rename. + * <p> + * When comparing two files together their score must be greater than or + * equal to the rename score for them to be considered a rename match. The + * score is computed based on content similarity, so a score of 60 implies + * that approximately 60% of the bytes in the files are identical. + * + * @param score + * new rename score, must be within [0, 100]. + * @throws IllegalArgumentException + * the score was not within [0, 100]. + */ + public void setRenameScore(int score) { + if (score < 0 || score > 100) + throw new IllegalArgumentException( + JGitText.get().similarityScoreMustBeWithinBounds); + renameScore = score; + } + + /** @return limit on number of paths to perform inexact rename detection. */ + public int getRenameLimit() { + return renameLimit; + } + + /** + * Set the limit on the number of files to perform inexact rename detection. + * <p> + * The rename detector has to build a square matrix of the rename limit on + * each side, then perform that many file compares to determine similarity. + * If 1000 files are added, and 1000 files are deleted, a 1000*1000 matrix + * must be allocated, and 1,000,000 file compares may need to be performed. + * + * @param limit + * new file limit. + */ + public void setRenameLimit(int limit) { + renameLimit = limit; + } + + /** + * Check if the detector is over the rename limit. + * <p> + * This method can be invoked either before or after {@code getEntries} has + * been used to perform rename detection. + * + * @return true if the detector has more file additions or removals than the + * rename limit is currently set to. In such configurations the + * detector will skip expensive computation. + */ + public boolean isOverRenameLimit() { + if (done) + return overRenameLimit; + int cnt = Math.max(added.size(), deleted.size()); + return getRenameLimit() != 0 && getRenameLimit() < cnt; + } + + /** + * Add entries to be considered for rename detection. + * + * @param entriesToAdd + * one or more entries to add. + * @throws IllegalStateException + * if {@code getEntries} was already invoked. + */ + public void addAll(Collection<DiffEntry> entriesToAdd) { + if (done) + throw new IllegalStateException(JGitText.get().renamesAlreadyFound); + + for (DiffEntry entry : entriesToAdd) { + switch (entry.getChangeType()) { + case ADD: + added.add(entry); + break; + + case DELETE: + deleted.add(entry); + break; + + case MODIFY: + if (sameType(entry.getOldMode(), entry.getNewMode())) + entries.add(entry); + else + entries.addAll(DiffEntry.breakModify(entry)); + break; + + case COPY: + case RENAME: + default: + entriesToAdd.add(entry); + } + } + } + + /** + * Add an entry to be considered for rename detection. + * + * @param entry + * to add. + * @throws IllegalStateException + * if {@code getEntries} was already invoked. + */ + public void add(DiffEntry entry) { + addAll(Collections.singletonList(entry)); + } + + /** + * Detect renames in the current file set. + * <p> + * This convenience function runs without a progress monitor. + * + * @return an unmodifiable list of {@link DiffEntry}s representing all files + * that have been changed. + * @throws IOException + * file contents cannot be read from the repository. + */ + public List<DiffEntry> compute() throws IOException { + return compute(NullProgressMonitor.INSTANCE); + } + + /** + * Detect renames in the current file set. + * + * @param pm + * report progress during the detection phases. + * @return an unmodifiable list of {@link DiffEntry}s representing all files + * that have been changed. + * @throws IOException + * file contents cannot be read from the repository. + */ + public List<DiffEntry> compute(ProgressMonitor pm) throws IOException { + if (!done) { + done = true; + + if (pm == null) + pm = NullProgressMonitor.INSTANCE; + findExactRenames(pm); + findContentRenames(pm); + + entries.addAll(added); + added = null; + + entries.addAll(deleted); + deleted = null; + + Collections.sort(entries, DIFF_COMPARATOR); + } + return Collections.unmodifiableList(entries); + } + + private void findContentRenames(ProgressMonitor pm) throws IOException { + int cnt = Math.max(added.size(), deleted.size()); + if (cnt == 0) + return; + + if (getRenameLimit() == 0 || cnt <= getRenameLimit()) { + SimilarityRenameDetector d; + + d = new SimilarityRenameDetector(repo, deleted, added); + d.setRenameScore(getRenameScore()); + d.compute(pm); + deleted = d.getLeftOverSources(); + added = d.getLeftOverDestinations(); + entries.addAll(d.getMatches()); + } else { + overRenameLimit = true; + } + } + + @SuppressWarnings("unchecked") + private void findExactRenames(ProgressMonitor pm) { + if (added.isEmpty() || deleted.isEmpty()) + return; + + pm.beginTask(JGitText.get().renamesFindingExact, // + added.size() + added.size() + deleted.size() + + added.size() * deleted.size()); + + HashMap<AbbreviatedObjectId, Object> deletedMap = populateMap(deleted, pm); + HashMap<AbbreviatedObjectId, Object> addedMap = populateMap(added, pm); + + ArrayList<DiffEntry> uniqueAdds = new ArrayList<DiffEntry>(added.size()); + ArrayList<List<DiffEntry>> nonUniqueAdds = new ArrayList<List<DiffEntry>>(); + + for (Object o : addedMap.values()) { + if (o instanceof DiffEntry) + uniqueAdds.add((DiffEntry) o); + else + nonUniqueAdds.add((List<DiffEntry>) o); + } + + ArrayList<DiffEntry> left = new ArrayList<DiffEntry>(added.size()); + + for (DiffEntry a : uniqueAdds) { + Object del = deletedMap.get(a.newId); + if (del instanceof DiffEntry) { + // We have one add to one delete: pair them if they are the same + // type + DiffEntry e = (DiffEntry) del; + if (sameType(e.oldMode, a.newMode)) { + e.changeType = ChangeType.RENAME; + entries.add(exactRename(e, a)); + } else { + left.add(a); + } + } else if (del != null) { + // We have one add to many deletes: find the delete with the + // same type and closest name to the add, then pair them + List<DiffEntry> list = (List<DiffEntry>) del; + DiffEntry best = bestPathMatch(a, list); + if (best != null) { + best.changeType = ChangeType.RENAME; + entries.add(exactRename(best, a)); + } else { + left.add(a); + } + } else { + left.add(a); + } + pm.update(1); + } + + for (List<DiffEntry> adds : nonUniqueAdds) { + Object o = deletedMap.get(adds.get(0).newId); + if (o instanceof DiffEntry) { + // We have many adds to one delete: find the add with the same + // type and closest name to the delete, then pair them. Mark the + // rest as copies of the delete. + DiffEntry d = (DiffEntry) o; + DiffEntry best = bestPathMatch(d, adds); + if (best != null) { + d.changeType = ChangeType.RENAME; + entries.add(exactRename(d, best)); + for (DiffEntry a : adds) { + if (a != best) { + if (sameType(d.oldMode, a.newMode)) { + entries.add(exactCopy(d, a)); + } else { + left.add(a); + } + } + } + } else { + left.addAll(adds); + } + } else { + // We have many adds to many deletes: score all the adds against + // all the deletes by path name, take the best matches, pair + // them as renames, then call the rest copies + List<DiffEntry> dels = (List<DiffEntry>) o; + long[] matrix = new long[dels.size() * adds.size()]; + int mNext = 0; + for (int addIdx = 0; addIdx < adds.size(); addIdx++) { + String addedName = adds.get(addIdx).newName; + + for (int delIdx = 0; delIdx < dels.size(); delIdx++) { + String deletedName = dels.get(delIdx).oldName; + + int score = SimilarityRenameDetector.nameScore(addedName, deletedName); + matrix[mNext] = SimilarityRenameDetector.encode(score, addIdx, delIdx); + mNext++; + } + } + + Arrays.sort(matrix); + + for (--mNext; mNext >= 0; mNext--) { + long ent = matrix[mNext]; + int delIdx = SimilarityRenameDetector.srcFile(ent); + int addIdx = SimilarityRenameDetector.dstFile(ent); + DiffEntry d = dels.get(delIdx); + DiffEntry a = adds.get(addIdx); + + if (a == null) { + pm.update(1); + continue; // was already matched earlier + } + + ChangeType type; + if (d.changeType == ChangeType.DELETE) { + // First use of this source file. Tag it as a rename so we + // later know it is already been used as a rename, other + // matches (if any) will claim themselves as copies instead. + // + d.changeType = ChangeType.RENAME; + type = ChangeType.RENAME; + } else { + type = ChangeType.COPY; + } + + entries.add(DiffEntry.pair(type, d, a, 100)); + adds.set(addIdx, null); // Claim the destination was matched. + pm.update(1); + } + } + } + added = left; + + deleted = new ArrayList<DiffEntry>(deletedMap.size()); + for (Object o : deletedMap.values()) { + if (o instanceof DiffEntry) { + DiffEntry e = (DiffEntry) o; + if (e.changeType == ChangeType.DELETE) + deleted.add(e); + } else { + List<DiffEntry> list = (List<DiffEntry>) o; + for (DiffEntry e : list) { + if (e.changeType == ChangeType.DELETE) + deleted.add(e); + } + } + } + pm.endTask(); + } + + /** + * Find the best match by file path for a given DiffEntry from a list of + * DiffEntrys. The returned DiffEntry will be of the same type as <src>. If + * no DiffEntry can be found that has the same type, this method will return + * null. + * + * @param src + * the DiffEntry to try to find a match for + * @param list + * a list of DiffEntrys to search through + * @return the DiffEntry from <list> who's file path best matches <src> + */ + private static DiffEntry bestPathMatch(DiffEntry src, List<DiffEntry> list) { + DiffEntry best = null; + int score = -1; + + for (DiffEntry d : list) { + if (sameType(mode(d), mode(src))) { + int tmp = SimilarityRenameDetector + .nameScore(path(d), path(src)); + if (tmp > score) { + best = d; + score = tmp; + } + } + } + + return best; + } + + @SuppressWarnings("unchecked") + private HashMap<AbbreviatedObjectId, Object> populateMap( + List<DiffEntry> diffEntries, ProgressMonitor pm) { + HashMap<AbbreviatedObjectId, Object> map = new HashMap<AbbreviatedObjectId, Object>(); + for (DiffEntry de : diffEntries) { + Object old = map.put(id(de), de); + if (old instanceof DiffEntry) { + ArrayList<DiffEntry> list = new ArrayList<DiffEntry>(2); + list.add((DiffEntry) old); + list.add(de); + map.put(id(de), list); + } else if (old != null) { + // Must be a list of DiffEntries + ((List<DiffEntry>) old).add(de); + map.put(id(de), old); + } + pm.update(1); + } + return map; + } + + private static String path(DiffEntry de) { + return de.changeType == ChangeType.DELETE ? de.oldName : de.newName; + } + + private static FileMode mode(DiffEntry de) { + return de.changeType == ChangeType.DELETE ? de.oldMode : de.newMode; + } + + private static AbbreviatedObjectId id(DiffEntry de) { + return de.changeType == ChangeType.DELETE ? de.oldId : de.newId; + } + + static boolean sameType(FileMode a, FileMode b) { + // Files have to be of the same type in order to rename them. + // We would never want to rename a file to a gitlink, or a + // symlink to a file. + // + int aType = a.getBits() & FileMode.TYPE_MASK; + int bType = b.getBits() & FileMode.TYPE_MASK; + return aType == bType; + } + + private static DiffEntry exactRename(DiffEntry src, DiffEntry dst) { + return DiffEntry.pair(ChangeType.RENAME, src, dst, EXACT_RENAME_SCORE); + } + + private static DiffEntry exactCopy(DiffEntry src, DiffEntry dst) { + return DiffEntry.pair(ChangeType.COPY, src, dst, EXACT_RENAME_SCORE); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java new file mode 100644 index 0000000000..d5a31d6044 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java @@ -0,0 +1,297 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import java.util.Arrays; + +import org.eclipse.jgit.lib.ObjectLoader; + +/** + * Index structure of lines/blocks in one file. + * <p> + * This structure can be used to compute an approximation of the similarity + * between two files. The index is used by {@link SimilarityRenameDetector} to + * compute scores between files. + * <p> + * To save space in memory, this index uses a space efficient encoding which + * will not exceed 1 MiB per instance. The index starts out at a smaller size + * (closer to 2 KiB), but may grow as more distinct blocks within the scanned + * file are discovered. + */ +class SimilarityIndex { + /** The {@link #idHash} table stops growing at {@code 1 << MAX_HASH_BITS}. */ + private static final int MAX_HASH_BITS = 17; + + /** The {@link #idHash} table will not grow bigger than this, ever. */ + private static final int MAX_HASH_SIZE = 1 << MAX_HASH_BITS; + + /** Prime just before {@link #MAX_HASH_SIZE}. */ + private static final int P = 131071; + + /** + * Shift to apply before storing a key. + * <p> + * Within the 64 bit table record space, we leave the highest bit unset so + * all values are positive, and we need {@link #MAX_HASH_BITS} bits for the + * keys. The lower 32 bits are used to count bytes impacted. + */ + private static final int KEY_SHIFT = 64 - 1 - MAX_HASH_BITS; + + /** Total size of the file we hashed into the structure. */ + private long fileSize; + + /** Number of non-zero entries in {@link #idHash}. */ + private int idSize; + + /** + * Pairings of content keys and counters. + * <p> + * Slots in the table are actually two ints wedged into a single long. The + * upper {@link #MAX_HASH_BITS} bits stores the content key, and the + * remaining lower bits stores the number of bytes associated with that key. + * Empty slots are denoted by 0, which cannot occur because the count cannot + * be 0. Values can only be positive, which we enforce during key addition. + */ + private long[] idHash; + + SimilarityIndex() { + idHash = new long[256]; + } + + long getFileSize() { + return fileSize; + } + + void setFileSize(long size) { + fileSize = size; + } + + void hash(ObjectLoader obj) { + byte[] raw = obj.getCachedBytes(); + setFileSize(raw.length); + hash(raw, 0, raw.length); + } + + void hash(byte[] raw, int ptr, final int end) { + while (ptr < end) { + int hash = 5381; + int start = ptr; + + // Hash one line, or one block, whichever occurs first. + do { + int c = raw[ptr++] & 0xff; + if (c == '\n') + break; + hash = (hash << 5) ^ c; + } while (ptr < end && ptr - start < 64); + add(hash, ptr - start); + } + } + + /** + * Sort the internal table so it can be used for efficient scoring. + * <p> + * Once sorted, additional lines/blocks cannot be added to the index. + */ + void sort() { + // Sort the array. All of the empty space will wind up at the front, + // because we forced all of the keys to always be positive. Later + // we only work with the back half of the array. + // + Arrays.sort(idHash); + } + + int score(SimilarityIndex dst, int maxScore) { + long max = Math.max(fileSize, dst.fileSize); + if (max == 0) + return maxScore; + return (int) ((common(dst) * maxScore) / max); + } + + int common(SimilarityIndex dst) { + return common(this, dst); + } + + private static int common(SimilarityIndex src, SimilarityIndex dst) { + int srcIdx = src.packedIndex(0); + int dstIdx = dst.packedIndex(0); + long[] srcHash = src.idHash; + long[] dstHash = dst.idHash; + return common(srcHash, srcIdx, dstHash, dstIdx); + } + + private static int common(long[] srcHash, int srcIdx, // + long[] dstHash, int dstIdx) { + if (srcIdx == srcHash.length || dstIdx == dstHash.length) + return 0; + + int common = 0; + int srcKey = keyOf(srcHash[srcIdx]); + int dstKey = keyOf(dstHash[dstIdx]); + + for (;;) { + if (srcKey == dstKey) { + common += countOf(dstHash[dstIdx]); + + if (++srcIdx == srcHash.length) + break; + srcKey = keyOf(srcHash[srcIdx]); + + if (++dstIdx == dstHash.length) + break; + dstKey = keyOf(dstHash[dstIdx]); + + } else if (srcKey < dstKey) { + // Regions of src which do not appear in dst. + if (++srcIdx == srcHash.length) + break; + srcKey = keyOf(srcHash[srcIdx]); + + } else /* if (srcKey > dstKey) */{ + // Regions of dst which do not appear in dst. + if (++dstIdx == dstHash.length) + break; + dstKey = keyOf(dstHash[dstIdx]); + } + } + + return common; + } + + // Testing only + int size() { + return idSize; + } + + // Testing only + int key(int idx) { + return keyOf(idHash[packedIndex(idx)]); + } + + // Testing only + long count(int idx) { + return countOf(idHash[packedIndex(idx)]); + } + + // Brute force approach only for testing. + int findIndex(int key) { + for (int i = 0; i < idSize; i++) + if (key(i) == key) + return i; + return -1; + } + + private int packedIndex(int idx) { + return (idHash.length - idSize) + idx; + } + + void add(int key, int cnt) { + key = hash(key); + int j = slot(key); + for (;;) { + long v = idHash[j]; + if (v == 0) { + // Empty slot in the table, store here. + if (shouldGrow()) { + grow(); + j = slot(key); + continue; + } + idHash[j] = (((long) key) << KEY_SHIFT) | cnt; + idSize++; + return; + + } else if (keyOf(v) == key) { + // Same key, increment the counter. + idHash[j] = v + cnt; + return; + + } else if (++j >= idHash.length) { + j = 0; + } + } + } + + private static int hash(int key) { + // Make the key fit into our table. Since we have a maximum size + // that we cap the table at, all keys get squashed before going + // into the table. This prevents overflow. + // + return (key >>> 1) % P; + } + + private int slot(int key) { + return key % idHash.length; + } + + private boolean shouldGrow() { + int n = idHash.length; + return n < MAX_HASH_SIZE && n <= idSize * 2; + } + + private void grow() { + long[] oldHash = idHash; + int oldSize = idHash.length; + + idHash = new long[2 * oldSize]; + for (int i = 0; i < oldSize; i++) { + long v = oldHash[i]; + if (v != 0) { + int j = slot(keyOf(v)); + while (idHash[j] != 0) + if (++j >= idHash.length) + j = 0; + idHash[j] = v; + } + } + } + + private static int keyOf(long v) { + return (int) (v >>> KEY_SHIFT); + } + + private static int countOf(long v) { + return (int) v; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java new file mode 100644 index 0000000000..e2115f0acc --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java @@ -0,0 +1,381 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.NullProgressMonitor; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.lib.Repository; + +class SimilarityRenameDetector { + /** + * Number of bits we need to express an index into src or dst list. + * <p> + * This must be 28, giving us a limit of 2^28 entries in either list, which + * is an insane limit of 536,870,912 file names being considered in a single + * rename pass. The other 8 bits are used to store the score, while staying + * under 127 so the long doesn't go negative. + */ + private static final int BITS_PER_INDEX = 28; + + private static final int INDEX_MASK = (1 << BITS_PER_INDEX) - 1; + + private static final int SCORE_SHIFT = 2 * BITS_PER_INDEX; + + private final Repository repo; + + /** + * All sources to consider for copies or renames. + * <p> + * A source is typically a {@link ChangeType#DELETE} change, but could be + * another type when trying to perform copy detection concurrently with + * rename detection. + */ + private List<DiffEntry> srcs; + + /** + * All destinations to consider looking for a rename. + * <p> + * A destination is typically an {@link ChangeType#ADD}, as the name has + * just come into existence, and we want to discover where its initial + * content came from. + */ + private List<DiffEntry> dsts; + + /** + * Matrix of all examined file pairs, and their scores. + * <p> + * The upper 8 bits of each long stores the score, but the score is bounded + * to be in the range (0, 128] so that the highest bit is never set, and all + * entries are therefore positive. + * <p> + * List indexes to an element of {@link #srcs} and {@link #dsts} are encoded + * as the lower two groups of 28 bits, respectively, but the encoding is + * inverted, so that 0 is expressed as {@code (1 << 28) - 1}. This sorts + * lower list indices later in the matrix, giving precedence to files whose + * names sort earlier in the tree. + */ + private long[] matrix; + + /** Score a pair must exceed to be considered a rename. */ + private int renameScore = 60; + + private List<DiffEntry> out; + + SimilarityRenameDetector(Repository repo, List<DiffEntry> srcs, + List<DiffEntry> dsts) { + this.repo = repo; + this.srcs = srcs; + this.dsts = dsts; + } + + void setRenameScore(int score) { + renameScore = score; + } + + void compute(ProgressMonitor pm) throws IOException { + if (pm == null) + pm = NullProgressMonitor.INSTANCE; + + pm.beginTask(JGitText.get().renamesFindingByContent, // + 2 * srcs.size() * dsts.size()); + + int mNext = buildMatrix(pm); + out = new ArrayList<DiffEntry>(Math.min(mNext, dsts.size())); + + // Match rename pairs on a first come, first serve basis until + // we have looked at everything that is above our minimum score. + // + for (--mNext; mNext >= 0; mNext--) { + long ent = matrix[mNext]; + int sIdx = srcFile(ent); + int dIdx = dstFile(ent); + DiffEntry s = srcs.get(sIdx); + DiffEntry d = dsts.get(dIdx); + + if (d == null) { + pm.update(1); + continue; // was already matched earlier + } + + ChangeType type; + if (s.changeType == ChangeType.DELETE) { + // First use of this source file. Tag it as a rename so we + // later know it is already been used as a rename, other + // matches (if any) will claim themselves as copies instead. + // + s.changeType = ChangeType.RENAME; + type = ChangeType.RENAME; + } else { + type = ChangeType.COPY; + } + + out.add(DiffEntry.pair(type, s, d, score(ent))); + dsts.set(dIdx, null); // Claim the destination was matched. + pm.update(1); + } + + srcs = compactSrcList(srcs); + dsts = compactDstList(dsts); + pm.endTask(); + } + + List<DiffEntry> getMatches() { + return out; + } + + List<DiffEntry> getLeftOverSources() { + return srcs; + } + + List<DiffEntry> getLeftOverDestinations() { + return dsts; + } + + private static List<DiffEntry> compactSrcList(List<DiffEntry> in) { + ArrayList<DiffEntry> r = new ArrayList<DiffEntry>(in.size()); + for (DiffEntry e : in) { + if (e.changeType == ChangeType.DELETE) + r.add(e); + } + return r; + } + + private static List<DiffEntry> compactDstList(List<DiffEntry> in) { + ArrayList<DiffEntry> r = new ArrayList<DiffEntry>(in.size()); + for (DiffEntry e : in) { + if (e != null) + r.add(e); + } + return r; + } + + private int buildMatrix(ProgressMonitor pm) throws IOException { + // Allocate for the worst-case scenario where every pair has a + // score that we need to consider. We might not need that many. + // + matrix = new long[srcs.size() * dsts.size()]; + + long[] srcSizes = new long[srcs.size()]; + long[] dstSizes = new long[dsts.size()]; + + // Init the size arrays to some value that indicates that we haven't + // calculated the size yet. Since sizes cannot be negative, -1 will work + Arrays.fill(srcSizes, -1); + Arrays.fill(dstSizes, -1); + + // Consider each pair of files, if the score is above the minimum + // threshold we need record that scoring in the matrix so we can + // later find the best matches. + // + int mNext = 0; + for (int srcIdx = 0; srcIdx < srcs.size(); srcIdx++) { + DiffEntry srcEnt = srcs.get(srcIdx); + if (!isFile(srcEnt.oldMode)) { + pm.update(dsts.size()); + continue; + } + + SimilarityIndex s = hash(srcEnt.oldId.toObjectId()); + for (int dstIdx = 0; dstIdx < dsts.size(); dstIdx++) { + DiffEntry dstEnt = dsts.get(dstIdx); + + if (!isFile(dstEnt.newMode)) { + pm.update(1); + continue; + } + + if (!RenameDetector.sameType(srcEnt.oldMode, dstEnt.newMode)) { + pm.update(1); + continue; + } + + long srcSize = srcSizes[srcIdx]; + if (srcSize < 0) { + srcSize = size(srcEnt.oldId.toObjectId()); + srcSizes[srcIdx] = srcSize; + } + + long dstSize = dstSizes[dstIdx]; + if (dstSize < 0) { + dstSize = size(dstEnt.newId.toObjectId()); + dstSizes[dstIdx] = dstSize; + } + + long max = Math.max(srcSize, dstSize); + long min = Math.min(srcSize, dstSize); + if (min * 100 / max < renameScore) { + // Cannot possibly match, as the file sizes are so different + pm.update(1); + continue; + } + + SimilarityIndex d = hash(dstEnt.newId.toObjectId()); + int contentScore = s.score(d, 10000); + + // nameScore returns a value between 0 and 100, but we want it + // to be in the same range as the content score. This allows it + // to be dropped into the pretty formula for the final score. + int nameScore = nameScore(srcEnt.oldName, dstEnt.newName) * 100; + + int score = (contentScore * 99 + nameScore * 1) / 10000; + + if (score < renameScore) { + pm.update(1); + continue; + } + + matrix[mNext++] = encode(score, srcIdx, dstIdx); + pm.update(1); + } + } + + // Sort everything in the range we populated, which might be the + // entire matrix, or just a smaller slice if we had some bad low + // scoring pairs. + // + Arrays.sort(matrix, 0, mNext); + return mNext; + } + + static int nameScore(String a, String b) { + int aDirLen = a.lastIndexOf("/") + 1; + int bDirLen = b.lastIndexOf("/") + 1; + + int dirMin = Math.min(aDirLen, bDirLen); + int dirMax = Math.max(aDirLen, bDirLen); + + final int dirScoreLtr; + final int dirScoreRtl; + + if (dirMax == 0) { + dirScoreLtr = 100; + dirScoreRtl = 100; + } else { + int dirSim = 0; + for (; dirSim < dirMin; dirSim++) { + if (a.charAt(dirSim) != b.charAt(dirSim)) + break; + } + dirScoreLtr = (dirSim * 100) / dirMax; + + if (dirScoreLtr == 100) { + dirScoreRtl = 100; + } else { + for (dirSim = 0; dirSim < dirMin; dirSim++) { + if (a.charAt(aDirLen - 1 - dirSim) != b.charAt(bDirLen - 1 + - dirSim)) + break; + } + dirScoreRtl = (dirSim * 100) / dirMax; + } + } + + int fileMin = Math.min(a.length() - aDirLen, b.length() - bDirLen); + int fileMax = Math.max(a.length() - aDirLen, b.length() - bDirLen); + + int fileSim = 0; + for (; fileSim < fileMin; fileSim++) { + if (a.charAt(a.length() - 1 - fileSim) != b.charAt(b.length() - 1 + - fileSim)) + break; + } + int fileScore = (fileSim * 100) / fileMax; + + return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100; + } + + private SimilarityIndex hash(ObjectId objectId) throws IOException { + SimilarityIndex r = new SimilarityIndex(); + r.hash(repo.openObject(objectId)); + r.sort(); + return r; + } + + private long size(ObjectId objectId) throws IOException { + return repo.openObject(objectId).getSize(); + } + + private static int score(long value) { + return (int) (value >>> SCORE_SHIFT); + } + + static int srcFile(long value) { + return decodeFile(((int) (value >>> BITS_PER_INDEX)) & INDEX_MASK); + } + + static int dstFile(long value) { + return decodeFile(((int) value) & INDEX_MASK); + } + + static long encode(int score, int srcIdx, int dstIdx) { + return (((long) score) << SCORE_SHIFT) // + | (encodeFile(srcIdx) << BITS_PER_INDEX) // + | encodeFile(dstIdx); + } + + private static long encodeFile(int idx) { + // We invert the index so that the first file in the list sorts + // later in the table. This permits us to break ties favoring + // earlier names over later ones. + // + return INDEX_MASK - idx; + } + + private static int decodeFile(int v) { + return INDEX_MASK - v; + } + + private static boolean isFile(FileMode mode) { + return (mode.getBits() & FileMode.TYPE_MASK) == FileMode.TYPE_FILE; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/AbbreviatedObjectId.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/AbbreviatedObjectId.java index a150e8fea1..3f188fe0e7 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/AbbreviatedObjectId.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/AbbreviatedObjectId.java @@ -83,6 +83,21 @@ public final class AbbreviatedObjectId { } /** + * Convert an AbbreviatedObjectId from an {@link AnyObjectId}. + * <p> + * This method copies over all bits of the Id, and is therefore complete + * (see {@link #isComplete()}). + * + * @param id + * the {@link ObjectId} to convert from. + * @return the converted object id. + */ + public static final AbbreviatedObjectId fromObjectId(AnyObjectId id) { + return new AbbreviatedObjectId(Constants.OBJECT_ID_STRING_LENGTH, + id.w1, id.w2, id.w3, id.w4, id.w5); + } + + /** * Convert an AbbreviatedObjectId from hex characters. * * @param str diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java b/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java index 25dc72af20..0c24fc6be2 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java @@ -60,6 +60,7 @@ import java.util.Collections; import java.util.List; import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.diff.DiffEntry; import org.eclipse.jgit.diff.EditList; import org.eclipse.jgit.lib.AbbreviatedObjectId; import org.eclipse.jgit.lib.Constants; @@ -69,10 +70,7 @@ import org.eclipse.jgit.util.RawParseUtils; import org.eclipse.jgit.util.TemporaryBuffer; /** Patch header describing an action for a single file path. */ -public class FileHeader { - /** Magical file name used for file adds or deletes. */ - public static final String DEV_NULL = "/dev/null"; - +public class FileHeader extends DiffEntry { private static final byte[] OLD_MODE = encodeASCII("old mode "); private static final byte[] NEW_MODE = encodeASCII("new mode "); @@ -103,24 +101,6 @@ public class FileHeader { static final byte[] NEW_NAME = encodeASCII("+++ "); - /** General type of change a single file-level patch describes. */ - public static enum ChangeType { - /** Add a new file to the project */ - ADD, - - /** Modify an existing file in the project (content and/or mode) */ - MODIFY, - - /** Delete an existing file from the project */ - DELETE, - - /** Rename an existing file to a new location */ - RENAME, - - /** Copy an existing file to a new location, keeping the original */ - COPY; - } - /** Type of patch used by this file. */ public static enum PatchType { /** A traditional unified diff style patch of a text file. */ @@ -142,30 +122,6 @@ public class FileHeader { /** Position 1 past the end of this file within {@link #buf}. */ int endOffset; - /** File name of the old (pre-image). */ - private String oldName; - - /** File name of the new (post-image). */ - private String newName; - - /** Old mode of the file, if described by the patch, else null. */ - private FileMode oldMode; - - /** New mode of the file, if described by the patch, else null. */ - protected FileMode newMode; - - /** General type of change indicated by the patch. */ - protected ChangeType changeType; - - /** Similarity score if {@link #changeType} is a copy or rename. */ - private int score; - - /** ObjectId listed on the index line for the old (pre-image) */ - private AbbreviatedObjectId oldId; - - /** ObjectId listed on the index line for the new (post-image) */ - protected AbbreviatedObjectId newId; - /** Type of patch used to modify this file */ PatchType patchType; @@ -178,6 +134,25 @@ public class FileHeader { /** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the old image */ BinaryHunk reverseBinaryHunk; + /** + * Constructs a new FileHeader + * + * @param headerLines + * buffer holding the diff header for this file + * @param edits + * the edits for this file + * @param type + * the type of patch used to modify this file + */ + public FileHeader(final byte[] headerLines, EditList edits, PatchType type) { + this(headerLines, 0); + endOffset = headerLines.length; + int ptr = parseGitFileName(Patch.DIFF_GIT.length, headerLines.length); + ptr = parseGitHeaders(ptr, headerLines.length); + this.patchType = type; + addHunk(new HunkHeader(this, edits)); + } + FileHeader(final byte[] b, final int offset) { buf = b; startOffset = offset; @@ -312,86 +287,6 @@ public class FileHeader { } } - /** - * Get the old name associated with this file. - * <p> - * The meaning of the old name can differ depending on the semantic meaning - * of this patch: - * <ul> - * <li><i>file add</i>: always <code>/dev/null</code></li> - * <li><i>file modify</i>: always {@link #getNewName()}</li> - * <li><i>file delete</i>: always the file being deleted</li> - * <li><i>file copy</i>: source file the copy originates from</li> - * <li><i>file rename</i>: source file the rename originates from</li> - * </ul> - * - * @return old name for this file. - */ - public String getOldName() { - return oldName; - } - - /** - * Get the new name associated with this file. - * <p> - * The meaning of the new name can differ depending on the semantic meaning - * of this patch: - * <ul> - * <li><i>file add</i>: always the file being created</li> - * <li><i>file modify</i>: always {@link #getOldName()}</li> - * <li><i>file delete</i>: always <code>/dev/null</code></li> - * <li><i>file copy</i>: destination file the copy ends up at</li> - * <li><i>file rename</i>: destination file the rename ends up at/li> - * </ul> - * - * @return new name for this file. - */ - public String getNewName() { - return newName; - } - - /** @return the old file mode, if described in the patch */ - public FileMode getOldMode() { - return oldMode; - } - - /** @return the new file mode, if described in the patch */ - public FileMode getNewMode() { - return newMode; - } - - /** @return the type of change this patch makes on {@link #getNewName()} */ - public ChangeType getChangeType() { - return changeType; - } - - /** - * @return similarity score between {@link #getOldName()} and - * {@link #getNewName()} if {@link #getChangeType()} is - * {@link ChangeType#COPY} or {@link ChangeType#RENAME}. - */ - public int getScore() { - return score; - } - - /** - * Get the old object id from the <code>index</code>. - * - * @return the object id; null if there is no index line - */ - public AbbreviatedObjectId getOldId() { - return oldId; - } - - /** - * Get the new object id from the <code>index</code>. - * - * @return the object id; null if there is no index line - */ - public AbbreviatedObjectId getNewId() { - return newId; - } - /** @return style of patch used to modify this file */ public PatchType getPatchType() { return patchType; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/patch/HunkHeader.java b/org.eclipse.jgit/src/org/eclipse/jgit/patch/HunkHeader.java index bfb20b64e9..a6ea74eb16 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/patch/HunkHeader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/patch/HunkHeader.java @@ -116,6 +116,8 @@ public class HunkHeader { /** Total number of lines of context appearing in this hunk */ int nContext; + private EditList editList; + HunkHeader(final FileHeader fh, final int offset) { this(fh, offset, new OldImage() { @Override @@ -131,6 +133,21 @@ public class HunkHeader { old = oi; } + HunkHeader(final FileHeader fh, final EditList editList) { + this(fh, fh.buf.length); + this.editList = editList; + endOffset = startOffset; + nContext = 0; + if (editList.isEmpty()) { + newStartLine = 0; + newLineCount = 0; + } else { + newStartLine = editList.get(0).getBeginB(); + Edit last = editList.get(editList.size() - 1); + newLineCount = last.getEndB() - newStartLine; + } + } + /** @return header for the file this hunk applies to */ public FileHeader getFileHeader() { return file; @@ -173,48 +190,50 @@ public class HunkHeader { /** @return a list describing the content edits performed within the hunk. */ public EditList toEditList() { - final EditList r = new EditList(); - final byte[] buf = file.buf; - int c = nextLF(buf, startOffset); - int oLine = old.startLine; - int nLine = newStartLine; - Edit in = null; - - SCAN: for (; c < endOffset; c = nextLF(buf, c)) { - switch (buf[c]) { - case ' ': - case '\n': - in = null; - oLine++; - nLine++; - continue; - - case '-': - if (in == null) { - in = new Edit(oLine - 1, nLine - 1); - r.add(in); + if (editList == null) { + editList = new EditList(); + final byte[] buf = file.buf; + int c = nextLF(buf, startOffset); + int oLine = old.startLine; + int nLine = newStartLine; + Edit in = null; + + SCAN: for (; c < endOffset; c = nextLF(buf, c)) { + switch (buf[c]) { + case ' ': + case '\n': + in = null; + oLine++; + nLine++; + continue; + + case '-': + if (in == null) { + in = new Edit(oLine - 1, nLine - 1); + editList.add(in); + } + oLine++; + in.extendA(); + continue; + + case '+': + if (in == null) { + in = new Edit(oLine - 1, nLine - 1); + editList.add(in); + } + nLine++; + in.extendB(); + continue; + + case '\\': // Matches "\ No newline at end of file" + continue; + + default: + break SCAN; } - oLine++; - in.extendA(); - continue; - - case '+': - if (in == null) { - in = new Edit(oLine - 1, nLine - 1); - r.add(in); - } - nLine++; - in.extendB(); - continue; - - case '\\': // Matches "\ No newline at end of file" - continue; - - default: - break SCAN; } } - return r; + return editList; } void parseHeader() { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/patch/Patch.java b/org.eclipse.jgit/src/org/eclipse/jgit/patch/Patch.java index ce006dadbf..cf42b665ae 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/patch/Patch.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/patch/Patch.java @@ -60,7 +60,7 @@ import org.eclipse.jgit.util.TemporaryBuffer; /** A parsed collection of {@link FileHeader}s from a unified diff patch file */ public class Patch { - private static final byte[] DIFF_GIT = encodeASCII("diff --git "); + static final byte[] DIFF_GIT = encodeASCII("diff --git "); private static final byte[] DIFF_CC = encodeASCII("diff --cc "); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java new file mode 100644 index 0000000000..9536206174 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.revwalk; + +import java.io.IOException; + +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.treewalk.TreeWalk; +import org.eclipse.jgit.treewalk.filter.PathFilter; +import org.eclipse.jgit.treewalk.filter.TreeFilter; + +/** + * Updates the internal path filter to follow copy/renames. + * <p> + * This is a special filter that performs {@code AND(path, ANY_DIFF)}, but also + * triggers rename detection so that the path node is updated to include a prior + * file name as the RevWalk traverses history. + * <p> + * Results with this filter are unpredictable if the path being followed is a + * subdirectory. + */ +public class FollowFilter extends TreeFilter { + /** + * Create a new tree filter for a user supplied path. + * <p> + * Path strings are relative to the root of the repository. If the user's + * input should be assumed relative to a subdirectory of the repository the + * caller must prepend the subdirectory's path prior to creating the filter. + * <p> + * Path strings use '/' to delimit directories on all platforms. + * + * @param path + * the path to filter on. Must not be the empty string. All + * trailing '/' characters will be trimmed before string's length + * is checked or is used as part of the constructed filter. + * @return a new filter for the requested path. + * @throws IllegalArgumentException + * the path supplied was the empty string. + */ + public static FollowFilter create(String path) { + return new FollowFilter(PathFilter.create(path)); + } + + private final PathFilter path; + + FollowFilter(final PathFilter path) { + this.path = path; + } + + /** @return the path this filter matches. */ + public String getPath() { + return path.getPath(); + } + + @Override + public boolean include(final TreeWalk walker) + throws MissingObjectException, IncorrectObjectTypeException, + IOException { + return path.include(walker) && ANY_DIFF.include(walker); + } + + @Override + public boolean shouldBeRecursive() { + return path.shouldBeRecursive() || ANY_DIFF.shouldBeRecursive(); + } + + @Override + public TreeFilter clone() { + return new FollowFilter(path.clone()); + } + + @Override + public String toString() { + return "(FOLLOW(" + path.toString() + ")" // + + " AND " // + + ANY_DIFF.toString() + ")"; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RewriteTreeFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RewriteTreeFilter.java index d7e5c8028f..4c5a2a77eb 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RewriteTreeFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RewriteTreeFilter.java @@ -44,11 +44,17 @@ package org.eclipse.jgit.revwalk; import java.io.IOException; +import java.util.List; +import org.eclipse.jgit.diff.DiffEntry; +import org.eclipse.jgit.diff.RenameDetector; +import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.StopWalkException; import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.filter.RevFilter; import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.treewalk.filter.TreeFilter; @@ -76,8 +82,11 @@ class RewriteTreeFilter extends RevFilter { private final TreeWalk pathFilter; + private final Repository repo; + RewriteTreeFilter(final RevWalk walker, final TreeFilter t) { - pathFilter = new TreeWalk(walker.db); + repo = walker.db; + pathFilter = new TreeWalk(repo); pathFilter.setFilter(t); pathFilter.setRecursive(t.shouldBeRecursive()); } @@ -128,6 +137,13 @@ class RewriteTreeFilter extends RevFilter { // We have interesting items, but neither of the special // cases denoted above. // + if (adds > 0 && tw.getFilter() instanceof FollowFilter) { + // One of the paths we care about was added in this + // commit. We need to update our filter to its older + // name, if we can discover it. Find out what that is. + // + updateFollowFilter(trees); + } return true; } } else if (nParents == 0) { @@ -213,4 +229,32 @@ class RewriteTreeFilter extends RevFilter { c.flags |= REWRITE; return false; } + + private void updateFollowFilter(ObjectId[] trees) + throws MissingObjectException, IncorrectObjectTypeException, + CorruptObjectException, IOException { + TreeWalk tw = pathFilter; + FollowFilter oldFilter = (FollowFilter) tw.getFilter(); + tw.setFilter(TreeFilter.ANY_DIFF); + tw.reset(trees); + + List<DiffEntry> files = DiffEntry.scan(tw); + RenameDetector rd = new RenameDetector(repo); + rd.addAll(files); + files = rd.compute(); + + TreeFilter newFilter = oldFilter; + for (DiffEntry ent : files) { + if (isRename(ent) && ent.getNewName().equals(oldFilter.getPath())) { + newFilter = FollowFilter.create(ent.getOldName()); + break; + } + } + tw.setFilter(newFilter); + } + + private static boolean isRename(DiffEntry ent) { + return ent.getChangeType() == ChangeType.RENAME + || ent.getChangeType() == ChangeType.COPY; + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java index e317789840..3f83114fd5 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java @@ -90,6 +90,11 @@ public class PathFilter extends TreeFilter { pathRaw = Constants.encode(pathStr); } + /** @return the path this filter matches. */ + public String getPath() { + return pathStr; + } + @Override public boolean include(final TreeWalk walker) { return walker.isPathPrefix(pathRaw, pathRaw.length) == 0; @@ -104,7 +109,7 @@ public class PathFilter extends TreeFilter { } @Override - public TreeFilter clone() { + public PathFilter clone() { return this; } |