diff options
author | Thomas Wolf <twolf@apache.org> | 2023-04-20 21:13:59 +0200 |
---|---|---|
committer | Matthias Sohn <matthias.sohn@sap.com> | 2023-04-28 17:04:47 -0400 |
commit | 3ed4cdda6b99f812258ca50bd77447a28d9d4596 (patch) | |
tree | a2e42d4924d4d157843178deb0f0741395885807 /org.eclipse.jgit | |
parent | 45de4fa2cb638a8e20fa27b3d88435a697bb9df8 (diff) | |
download | jgit-3ed4cdda6b99f812258ca50bd77447a28d9d4596.tar.gz jgit-3ed4cdda6b99f812258ca50bd77447a28d9d4596.zip |
AddCommand: ability to switch off renormalization
JGit's AddCommand always renormalizes tracked files. C git does so only
on git add --renormalize. Especially for git add . and the JGit
equivalent git.add().addFilepattern(".").call() this can make a big
difference if there are many files, or large files.
Add a "renormalize" option to AddCommand. To maintain compatibility with
existing uses, this option is "true" by default, and the behavior of
AddCommand is as it has always been in JGit.
If set to "false", use an IndexDiffFilter (in addition to a path filter,
if any). This skips any unchanged files (that are not racily clean) from
content checks. Note that changes in CRLF settings or in filters will be
ignored for such files if renormalize == false.
Add the "--renormalize" option to the Add command in the JGit command
line program. For the command-line program, the default is as in C git:
renormalize is off by default and enabled only if the option is given.
Note that --renormalize implies --update in the command line program, as
in C git. In AddCommand, the two settings are independent.
Additionally, avoid opening input streams unnecessarily in
WorkingTreeIterator.getEntryContentLength() and fix some bogus
indentation.
Add a simple test that adds 1000 files of 10kB in 10 directories twice
and that fails if the second invocation (without any changes) with
renormalize=false is not significantly faster.
Locally, I observe for that second invocation
* git.add().addFilepattern(".").call() ~660ms
* git.add().addFilepattern(".").setRenormalize(false).call() ~16ms
Bug: 494323
Change-Id: I30f9d518563fa55d7058a48c27c425f3b60aeb4c
Signed-off-by: Thomas Wolf <twolf@apache.org>
Diffstat (limited to 'org.eclipse.jgit')
-rw-r--r-- | org.eclipse.jgit/src/org/eclipse/jgit/api/AddCommand.java | 58 | ||||
-rw-r--r-- | org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java | 49 |
2 files changed, 93 insertions, 14 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/api/AddCommand.java b/org.eclipse.jgit/src/org/eclipse/jgit/api/AddCommand.java index ae75d466de..cb32324043 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/api/AddCommand.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/api/AddCommand.java @@ -39,7 +39,10 @@ import org.eclipse.jgit.treewalk.FileTreeIterator; import org.eclipse.jgit.treewalk.NameConflictTreeWalk; import org.eclipse.jgit.treewalk.TreeWalk.OperationType; import org.eclipse.jgit.treewalk.WorkingTreeIterator; +import org.eclipse.jgit.treewalk.filter.AndTreeFilter; +import org.eclipse.jgit.treewalk.filter.IndexDiffFilter; import org.eclipse.jgit.treewalk.filter.PathFilterGroup; +import org.eclipse.jgit.treewalk.filter.TreeFilter; /** * A class used to execute a {@code Add} command. It has setters for all @@ -58,6 +61,10 @@ public class AddCommand extends GitCommand<DirCache> { private boolean update = false; + // This defaults to true because it's what JGit has been doing + // traditionally. The C git default would be false. + private boolean renormalize = true; + /** * Constructor for AddCommand * @@ -127,8 +134,20 @@ public class AddCommand extends GitCommand<DirCache> { workingTreeIterator = new FileTreeIterator(repo); workingTreeIterator.setDirCacheIterator(tw, 0); tw.addTree(workingTreeIterator); - if (!addAll) - tw.setFilter(PathFilterGroup.createFromStrings(filepatterns)); + TreeFilter pathFilter = null; + if (!addAll) { + pathFilter = PathFilterGroup.createFromStrings(filepatterns); + } + if (!renormalize) { + if (pathFilter == null) { + tw.setFilter(new IndexDiffFilter(0, 1)); + } else { + tw.setFilter(AndTreeFilter.create(new IndexDiffFilter(0, 1), + pathFilter)); + } + } else if (pathFilter != null) { + tw.setFilter(pathFilter); + } byte[] lastAdded = null; @@ -260,4 +279,39 @@ public class AddCommand extends GitCommand<DirCache> { public boolean isUpdate() { return update; } + + /** + * Defines whether the command will renormalize by re-applying the "clean" + * process to tracked files. + * <p> + * This does not automatically call {@link #setUpdate(boolean)}. + * </p> + * + * @param renormalize + * whether to renormalize tracked files + * @return {@code this} + * @since 6.6 + */ + public AddCommand setRenormalize(boolean renormalize) { + this.renormalize = renormalize; + return this; + } + + /** + * Tells whether the command will renormalize by re-applying the "clean" + * process to tracked files. + * <p> + * For legacy reasons, this is {@code true} by default. + * </p> + * <p> + * This setting is independent of {@link #isUpdate()}. In C git, + * command-line option --renormalize implies --update. + * </p> + * + * @return whether files will be renormalized + * @since 6.6 + */ + public boolean isRenormalize() { + return renormalize; + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java index d8a61ec97a..b5d6610d52 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java @@ -399,6 +399,35 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { } } + private long possiblyFilteredLength(Entry e, long len) throws IOException { + if (getCleanFilterCommand() == null && getEolStreamType( + OperationType.CHECKIN_OP) == EolStreamType.DIRECT) { + return len; + } + + if (len <= MAXIMUM_FILE_SIZE_TO_READ_FULLY) { + InputStream is = e.openInputStream(); + try { + ByteBuffer rawbuf = IO.readWholeStream(is, (int) len); + rawbuf = filterClean(rawbuf.array(), rawbuf.limit()); + return rawbuf.limit(); + } finally { + safeClose(is); + } + } + + if (getCleanFilterCommand() == null && isBinary(e)) { + return len; + } + + InputStream is = filterClean(e.openInputStream()); + try { + return computeLength(is); + } finally { + safeClose(is); + } + } + private InputStream possiblyFilteredInputStream(final Entry e, final InputStream is, final long len) throws IOException { @@ -417,11 +446,11 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { } if (getCleanFilterCommand() == null && isBinary(e)) { - canonLen = len; - return is; - } + canonLen = len; + return is; + } - final InputStream lenIs = filterClean(e.openInputStream()); + final InputStream lenIs = filterClean(e.openInputStream()); try { canonLen = computeLength(lenIs); } finally { @@ -595,15 +624,11 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { public long getEntryContentLength() throws IOException { if (canonLen == -1) { long rawLen = getEntryLength(); - if (rawLen == 0) + if (rawLen == 0) { canonLen = 0; - InputStream is = current().openInputStream(); - try { - // canonLen gets updated here - possiblyFilteredInputStream(current(), is, current() - .getLength()); - } finally { - safeClose(is); + } else { + canonLen = possiblyFilteredLength(current(), + current().getLength()); } } return canonLen; |