Getting attributes of files on Windows is an expensive operation. Windows stores file attributes in the directory, so they are basically available "for free" when a directory is listed. The implementation of Java's Files.walkFileTree() takes advantage of that (at least in the OpenJDK implementation for Windows) and provides the attributes from the directory to a FileVisitor. Using Files.walkFileTree() with a maximum depth of 1 is thus a good approach on Windows to get both the file names and the attributes in one go. In my tests, this gives a significant speed-up of FileTreeIterator over the "normal" way: using File.listFiles() and then reading the attributes of each file individually. The speed-up is hard to quantify exactly, but in my tests I've observed consistently 30-40% for staging 500 files one after another, each individually, and up to 50% for individual TreeWalks with a FileTreeIterator. On Unix, this technique is detrimental. Unix stores file attributes differently, and getting attributes of individual files is not costly. On Unix, the old way of doing a listFiles() and getting individual attributes (both native operations) is about three times faster than using walkFileTree, which is implemented in Java. Therefore, move the operation to FS/FS_Win32 and call it from FileTreeIterator, so that we can have different implementations depending on the file system. A little performance test program is included as a JUnit test (to be run manually). While this does speed up things on Windows, it doesn't solve the basic problem of bug 532300: the iterator always gets the full directory listing and the attributes of all files, and the more files there are the longer that takes. Bug: 532300 Change-Id: Ic5facb871c725256c2324b0d97b95e6efc33282a Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>tags/v5.0.0.201805151920-m7
@@ -0,0 +1,93 @@ | |||
/* | |||
* Copyright (C) 2018, Thomas Wolf <thomas.wolf@paranor.ch> | |||
* and other copyright owners as documented in the project's IP log. | |||
* | |||
* This program and the accompanying materials are made available | |||
* under the terms of the Eclipse Distribution License v1.0 which | |||
* accompanies this distribution, is reproduced below, and is | |||
* available at http://www.eclipse.org/org/documents/edl-v10.php | |||
* | |||
* All rights reserved. | |||
* | |||
* Redistribution and use in source and binary forms, with or | |||
* without modification, are permitted provided that the following | |||
* conditions are met: | |||
* | |||
* - Redistributions of source code must retain the above copyright | |||
* notice, this list of conditions and the following disclaimer. | |||
* | |||
* - Redistributions in binary form must reproduce the above | |||
* copyright notice, this list of conditions and the following | |||
* disclaimer in the documentation and/or other materials provided | |||
* with the distribution. | |||
* | |||
* - Neither the name of the Eclipse Foundation, Inc. nor the | |||
* names of its contributors may be used to endorse or promote | |||
* products derived from this software without specific prior | |||
* written permission. | |||
* | |||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND | |||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, | |||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | |||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*/ | |||
package org.eclipse.jgit.treewalk; | |||
import static org.junit.Assert.fail; | |||
import org.eclipse.jgit.api.Git; | |||
import org.eclipse.jgit.junit.RepositoryTestCase; | |||
import org.eclipse.jgit.treewalk.filter.PathFilter; | |||
import org.junit.Test; | |||
/** | |||
* Simple performance test for git add / FileTreeIterator. | |||
*/ | |||
public class FileTreeIteratorPerformanceTest extends RepositoryTestCase { | |||
private static int N_OF_FILES = 501; | |||
@Test | |||
public void testPerformance() throws Exception { | |||
try (Git git = new Git(db)) { | |||
long times[] = new long[N_OF_FILES]; | |||
long sum = 0; | |||
String lastName = null; | |||
for (int i = 0; i < N_OF_FILES; i++) { | |||
lastName = "a" + (i + 100000) + ".txt"; | |||
writeTrashFile(lastName, ""); | |||
long start = System.nanoTime(); | |||
git.add().addFilepattern(lastName).call(); | |||
long elapsed = System.nanoTime() - start; | |||
times[i] = elapsed; | |||
sum += elapsed; | |||
} | |||
System.out.println("Total (µs) " + sum / 1000.0); | |||
for (int i = 0; i < times.length; i++) { | |||
System.out | |||
.println("Time " + i + " (µs) = " + times[i] / 1000.0); | |||
} | |||
FileTreeIterator iter = new FileTreeIterator(db); | |||
try (TreeWalk walk = new TreeWalk(db)) { | |||
walk.setFilter(PathFilter.create(lastName)); | |||
walk.addTree(iter); | |||
long start = System.nanoTime(); | |||
if (walk.next()) { | |||
System.out.println("Walk time (µs) = " | |||
+ (System.nanoTime() - start) / 1000.0); | |||
} else { | |||
fail("File not found"); | |||
} | |||
} | |||
} | |||
} | |||
} |
@@ -211,13 +211,7 @@ public class FileTreeIterator extends WorkingTreeIterator { | |||
} | |||
private Entry[] entries() { | |||
final File[] all = directory.listFiles(); | |||
if (all == null) | |||
return EOF; | |||
final Entry[] r = new Entry[all.length]; | |||
for (int i = 0; i < r.length; i++) | |||
r[i] = new FileEntry(all[i], fs, fileModeStrategy); | |||
return r; | |||
return fs.list(directory, fileModeStrategy); | |||
} | |||
/** | |||
@@ -246,7 +240,7 @@ public class FileTreeIterator extends WorkingTreeIterator { | |||
* | |||
* @since 4.3 | |||
*/ | |||
static public class DefaultFileModeStrategy implements FileModeStrategy { | |||
public static class DefaultFileModeStrategy implements FileModeStrategy { | |||
/** | |||
* a singleton instance of the default FileModeStrategy | |||
*/ | |||
@@ -279,7 +273,7 @@ public class FileTreeIterator extends WorkingTreeIterator { | |||
* | |||
* @since 4.3 | |||
*/ | |||
static public class NoGitlinksStrategy implements FileModeStrategy { | |||
public static class NoGitlinksStrategy implements FileModeStrategy { | |||
/** | |||
* a singleton instance of the default FileModeStrategy | |||
@@ -304,7 +298,7 @@ public class FileTreeIterator extends WorkingTreeIterator { | |||
/** | |||
* Wrapper for a standard Java IO file | |||
*/ | |||
static public class FileEntry extends Entry { | |||
public static class FileEntry extends Entry { | |||
private final FileMode mode; | |||
private FS.Attributes attributes; | |||
@@ -343,6 +337,29 @@ public class FileTreeIterator extends WorkingTreeIterator { | |||
mode = fileModeStrategy.getMode(f, attributes); | |||
} | |||
/** | |||
* Create a new file entry given the specified FileModeStrategy | |||
* | |||
* @param f | |||
* file | |||
* @param fs | |||
* file system | |||
* @param attributes | |||
* of the file | |||
* @param fileModeStrategy | |||
* the strategy to use when determining the FileMode of a | |||
* file; controls gitlinks etc. | |||
* | |||
* @since 5.0 | |||
*/ | |||
public FileEntry(File f, FS fs, FS.Attributes attributes, | |||
FileModeStrategy fileModeStrategy) { | |||
this.fs = fs; | |||
this.attributes = attributes; | |||
f = fs.normalize(f); | |||
mode = fileModeStrategy.getMode(f, attributes); | |||
} | |||
@Override | |||
public FileMode getMode() { | |||
return mode; | |||
@@ -365,12 +382,12 @@ public class FileTreeIterator extends WorkingTreeIterator { | |||
@Override | |||
public InputStream openInputStream() throws IOException { | |||
if (fs.isSymLink(getFile())) | |||
if (attributes.isSymbolicLink()) { | |||
return new ByteArrayInputStream(fs.readSymLink(getFile()) | |||
.getBytes( | |||
Constants.CHARACTER_ENCODING)); | |||
else | |||
.getBytes(Constants.CHARACTER_ENCODING)); | |||
} else { | |||
return new FileInputStream(getFile()); | |||
} | |||
} | |||
/** |
@@ -1165,8 +1165,12 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { | |||
return contentDigest.digest(); | |||
} | |||
/** A single entry within a working directory tree. */ | |||
protected static abstract class Entry { | |||
/** | |||
* A single entry within a working directory tree. | |||
* | |||
* @since 5.0 | |||
*/ | |||
public static abstract class Entry { | |||
byte[] encodedName; | |||
int encodedNameLen; |
@@ -71,6 +71,9 @@ import org.eclipse.jgit.errors.CommandFailedException; | |||
import org.eclipse.jgit.internal.JGitText; | |||
import org.eclipse.jgit.lib.Constants; | |||
import org.eclipse.jgit.lib.Repository; | |||
import org.eclipse.jgit.treewalk.FileTreeIterator.FileEntry; | |||
import org.eclipse.jgit.treewalk.FileTreeIterator.FileModeStrategy; | |||
import org.eclipse.jgit.treewalk.WorkingTreeIterator.Entry; | |||
import org.eclipse.jgit.util.ProcessResult.Status; | |||
import org.slf4j.Logger; | |||
import org.slf4j.LoggerFactory; | |||
@@ -81,6 +84,14 @@ import org.slf4j.LoggerFactory; | |||
public abstract class FS { | |||
private static final Logger LOG = LoggerFactory.getLogger(FS.class); | |||
/** | |||
* An empty array of entries, suitable as a return value for | |||
* {@link #list(File, FileModeStrategy)}. | |||
* | |||
* @since 5.0 | |||
*/ | |||
protected static final Entry[] NO_ENTRIES = {}; | |||
/** | |||
* This class creates FS instances. It will be overridden by a Java7 variant | |||
* if such can be detected in {@link #detect(Boolean)}. | |||
@@ -886,6 +897,29 @@ public abstract class FS { | |||
return FileUtils.relativizePath(base, other, File.separator, this.isCaseSensitive()); | |||
} | |||
/** | |||
* Enumerates children of a directory. | |||
* | |||
* @param directory | |||
* to get the children of | |||
* @param fileModeStrategy | |||
* to use to calculate the git mode of a child | |||
* @return an array of entries for the children | |||
* | |||
* @since 5.0 | |||
*/ | |||
public Entry[] list(File directory, FileModeStrategy fileModeStrategy) { | |||
final File[] all = directory.listFiles(); | |||
if (all == null) { | |||
return NO_ENTRIES; | |||
} | |||
final Entry[] result = new Entry[all.length]; | |||
for (int i = 0; i < result.length; i++) { | |||
result[i] = new FileEntry(all[i], this, fileModeStrategy); | |||
} | |||
return result; | |||
} | |||
/** | |||
* Checks whether the given hook is defined for the given repository, then | |||
* runs it with the given arguments. |
@@ -47,11 +47,21 @@ package org.eclipse.jgit.util; | |||
import java.io.File; | |||
import java.io.IOException; | |||
import java.nio.charset.Charset; | |||
import java.nio.file.FileVisitOption; | |||
import java.nio.file.FileVisitResult; | |||
import java.nio.file.Files; | |||
import java.nio.file.Path; | |||
import java.nio.file.SimpleFileVisitor; | |||
import java.nio.file.attribute.BasicFileAttributes; | |||
import java.util.ArrayList; | |||
import java.util.Arrays; | |||
import java.util.EnumSet; | |||
import java.util.List; | |||
import org.eclipse.jgit.errors.CommandFailedException; | |||
import org.eclipse.jgit.treewalk.FileTreeIterator.FileEntry; | |||
import org.eclipse.jgit.treewalk.FileTreeIterator.FileModeStrategy; | |||
import org.eclipse.jgit.treewalk.WorkingTreeIterator.Entry; | |||
import org.slf4j.Logger; | |||
import org.slf4j.LoggerFactory; | |||
@@ -119,6 +129,49 @@ public class FS_Win32 extends FS { | |||
return true; | |||
} | |||
/** {@inheritDoc} */ | |||
@Override | |||
public Entry[] list(File directory, FileModeStrategy fileModeStrategy) { | |||
List<Entry> result = new ArrayList<>(); | |||
FS fs = this; | |||
boolean checkExecutable = fs.supportsExecute(); | |||
try { | |||
Files.walkFileTree(directory.toPath(), | |||
EnumSet.noneOf(FileVisitOption.class), 1, | |||
new SimpleFileVisitor<Path>() { | |||
@Override | |||
public FileVisitResult visitFile(Path file, | |||
BasicFileAttributes attrs) throws IOException { | |||
File f = file.toFile(); | |||
FS.Attributes attributes = new FS.Attributes(fs, f, | |||
true, attrs.isDirectory(), | |||
checkExecutable && f.canExecute(), | |||
attrs.isSymbolicLink(), | |||
attrs.isRegularFile(), | |||
attrs.creationTime().toMillis(), | |||
attrs.lastModifiedTime().toMillis(), | |||
attrs.size()); | |||
result.add(new FileEntry(f, fs, attributes, | |||
fileModeStrategy)); | |||
return FileVisitResult.CONTINUE; | |||
} | |||
@Override | |||
public FileVisitResult visitFileFailed(Path file, | |||
IOException exc) throws IOException { | |||
// Just ignore it | |||
return FileVisitResult.CONTINUE; | |||
} | |||
}); | |||
} catch (IOException e) { | |||
// Ignore | |||
} | |||
if (result.isEmpty()) { | |||
return NO_ENTRIES; | |||
} | |||
return result.toArray(new Entry[result.size()]); | |||
} | |||
/** {@inheritDoc} */ | |||
@Override | |||
protected File discoverGitExe() { |