From b0eb744604391c94ec505f846604df1a07a166b1 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Sun, 29 Nov 2015 12:04:03 -0800 Subject: Delay locating .gitattributes until requested Instead of checking every entry for .gitattributes only look for the entry on request by TreeWalk. This avoids impacting uses like RevWalk filtering history. When the attrs is requested skip to the start of the tree and look for .gitattributes until either it is found, or it is impossible to be present. Due to the sorting rules of tree entries .gitattributes should be among the first or second entries in the tree so very few entries will need to be considered. Waiting to find the .gitattributes file by native ordering may miss attrs for files like .config, which sorts before .gitattributes. Starting from the front of the tree on demand ensures the attributes are parsed as early as necessary to process any entry in the tree. Due to TreeWalk recursively processing up the tree of iterators we cannot just reset the current CanonicalTreeParser to the start as parent parsers share the same path buffer as their children. Resetting a parent to look for .gitattributes may overwrite path buffer data used by a child iterator. Work around this by building a new temporary CanonicalTreeParser instance. Change-Id: Ife950253b687be325340d27e9915c9a40df2641c --- .../jgit/treewalk/CanonicalTreeParserTest.java | 41 +++++++++++ .../jgit/treewalk/AbstractTreeIterator.java | 36 ++++++++++ .../eclipse/jgit/treewalk/CanonicalTreeParser.java | 80 ++++++++++------------ 3 files changed, 114 insertions(+), 43 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/CanonicalTreeParserTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/CanonicalTreeParserTest.java index 52da69ef52..f5e97c2dc2 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/CanonicalTreeParserTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/CanonicalTreeParserTest.java @@ -43,6 +43,8 @@ package org.eclipse.jgit.treewalk; +import static org.eclipse.jgit.lib.FileMode.REGULAR_FILE; +import static org.eclipse.jgit.lib.FileMode.SYMLINK; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertSame; @@ -50,9 +52,11 @@ import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; +import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.TreeFormatter; import org.eclipse.jgit.util.RawParseUtils; import org.junit.Before; import org.junit.Test; @@ -369,4 +373,41 @@ public class CanonicalTreeParserTest { assertEquals(name, RawParseUtils.decode(Constants.CHARSET, ctp.path, ctp.pathOffset, ctp.pathLen)); } + + @Test + public void testFindAttributesWhenFirst() throws CorruptObjectException { + TreeFormatter tree = new TreeFormatter(); + tree.append(".gitattributes", REGULAR_FILE, hash_a); + ctp.reset(tree.toByteArray()); + + assertTrue(ctp.findFile(".gitattributes")); + assertEquals(REGULAR_FILE.getBits(), ctp.getEntryRawMode()); + assertEquals(".gitattributes", ctp.getEntryPathString()); + assertEquals(hash_a, ctp.getEntryObjectId()); + } + + @Test + public void testFindAttributesWhenSecond() throws CorruptObjectException { + TreeFormatter tree = new TreeFormatter(); + tree.append(".config", SYMLINK, hash_a); + tree.append(".gitattributes", REGULAR_FILE, hash_foo); + ctp.reset(tree.toByteArray()); + + assertTrue(ctp.findFile(".gitattributes")); + assertEquals(REGULAR_FILE.getBits(), ctp.getEntryRawMode()); + assertEquals(".gitattributes", ctp.getEntryPathString()); + assertEquals(hash_foo, ctp.getEntryObjectId()); + } + + @Test + public void testFindAttributesWhenMissing() throws CorruptObjectException { + TreeFormatter tree = new TreeFormatter(); + tree.append("src", REGULAR_FILE, hash_a); + tree.append("zoo", REGULAR_FILE, hash_foo); + ctp.reset(tree.toByteArray()); + + assertFalse(ctp.findFile(".gitattributes")); + assertEquals(11, ctp.idOffset()); // Did not walk the entire tree. + assertEquals("src", ctp.getEntryPathString()); + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java index aa5f32a870..5e71889574 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java @@ -327,6 +327,42 @@ public abstract class AbstractTreeIterator { return pathCompare(p.path, cPos, p.pathLen, pMode, cPos); } + /** + * Seek the iterator on a file, if present. + * + * @param name + * file name to find (will not find a directory). + * @return true if the file exists in this tree; false otherwise. + * @throws CorruptObjectException + * tree is invalid. + * @since 4.2 + */ + public boolean findFile(String name) throws CorruptObjectException { + return findFile(Constants.encode(name)); + } + + /** + * Seek the iterator on a file, if present. + * + * @param name + * file name to find (will not find a directory). + * @return true if the file exists in this tree; false otherwise. + * @throws CorruptObjectException + * tree is invalid. + * @since 4.2 + */ + public boolean findFile(byte[] name) throws CorruptObjectException { + for (; !eof(); next(1)) { + int cmp = pathCompare(name, 0, name.length, 0, pathOffset); + if (cmp == 0) { + return true; + } else if (cmp > 0) { + return false; + } + } + return false; + } + /** * Compare the path of this current entry to a raw buffer. * diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/CanonicalTreeParser.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/CanonicalTreeParser.java index c24efe20aa..c038f07725 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/CanonicalTreeParser.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/CanonicalTreeParser.java @@ -44,6 +44,13 @@ package org.eclipse.jgit.treewalk; +import static org.eclipse.jgit.lib.Constants.DOT_GIT_ATTRIBUTES; +import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH; +import static org.eclipse.jgit.lib.Constants.OBJ_BLOB; +import static org.eclipse.jgit.lib.Constants.OBJ_TREE; +import static org.eclipse.jgit.lib.Constants.TYPE_TREE; +import static org.eclipse.jgit.lib.Constants.encode; + import java.io.IOException; import java.io.InputStream; import java.util.Arrays; @@ -54,20 +61,15 @@ import org.eclipse.jgit.attributes.AttributesRule; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.AnyObjectId; -import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.MutableObjectId; import org.eclipse.jgit.lib.ObjectId; -import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.lib.ObjectReader; -import org.eclipse.jgit.util.RawParseUtils; /** Parses raw Git trees from the canonical semi-text/semi-binary format. */ public class CanonicalTreeParser extends AbstractTreeIterator { private static final byte[] EMPTY = {}; - - private static final byte[] ATTRS = Constants - .encode(Constants.DOT_GIT_ATTRIBUTES); + private static final byte[] ATTRS = encode(DOT_GIT_ATTRIBUTES); private byte[] raw; @@ -133,6 +135,7 @@ public class CanonicalTreeParser extends AbstractTreeIterator { * the raw tree content. */ public void reset(final byte[] treeData) { + attributesNode = null; raw = treeData; prevPtr = -1; currPtr = 0; @@ -208,7 +211,7 @@ public class CanonicalTreeParser extends AbstractTreeIterator { */ public void reset(final ObjectReader reader, final AnyObjectId id) throws IncorrectObjectTypeException, IOException { - reset(reader.open(id, Constants.OBJ_TREE).getCachedBytes()); + reset(reader.open(id, OBJ_TREE).getCachedBytes()); } @Override @@ -218,7 +221,7 @@ public class CanonicalTreeParser extends AbstractTreeIterator { idBuffer.fromRaw(idBuffer(), idOffset()); if (!FileMode.TREE.equals(mode)) { final ObjectId me = idBuffer.toObjectId(); - throw new IncorrectObjectTypeException(me, Constants.TYPE_TREE); + throw new IncorrectObjectTypeException(me, TYPE_TREE); } return createSubtreeIterator0(reader, idBuffer); } @@ -263,7 +266,7 @@ public class CanonicalTreeParser extends AbstractTreeIterator { @Override public int idOffset() { - return nextPtr - Constants.OBJECT_ID_LENGTH; + return nextPtr - OBJECT_ID_LENGTH; } @Override @@ -301,7 +304,7 @@ public class CanonicalTreeParser extends AbstractTreeIterator { prevPtr = ptr; while (raw[ptr] != 0) ptr++; - ptr += Constants.OBJECT_ID_LENGTH + 1; + ptr += OBJECT_ID_LENGTH + 1; } if (delta != 0) throw new ArrayIndexOutOfBoundsException(delta); @@ -337,7 +340,7 @@ public class CanonicalTreeParser extends AbstractTreeIterator { trace[delta] = ptr; while (raw[ptr] != 0) ptr++; - ptr += Constants.OBJECT_ID_LENGTH + 1; + ptr += OBJECT_ID_LENGTH + 1; } if (trace[1] == -1) throw new ArrayIndexOutOfBoundsException(delta); @@ -372,12 +375,7 @@ public class CanonicalTreeParser extends AbstractTreeIterator { } } pathLen = tmp; - nextPtr = ptr + Constants.OBJECT_ID_LENGTH; - - // Check if this entry is a .gitattributes file - if (path[pathOffset] == '.' - && RawParseUtils.match(path, pathOffset, ATTRS) > 0) - attributesNode = new LazyLoadingAttributesNode(idOffset()); + nextPtr = ptr + OBJECT_ID_LENGTH; } /** @@ -391,36 +389,32 @@ public class CanonicalTreeParser extends AbstractTreeIterator { */ public AttributesNode getEntryAttributesNode(ObjectReader reader) throws IOException { - if (attributesNode instanceof LazyLoadingAttributesNode) - attributesNode = ((LazyLoadingAttributesNode) attributesNode) - .load(reader); - return attributesNode; + if (attributesNode == null) { + attributesNode = findAttributes(reader); + } + return attributesNode.getRules().isEmpty() ? null : attributesNode; } - /** - * {@link AttributesNode} implementation that provides lazy loading - */ - private class LazyLoadingAttributesNode extends AttributesNode { - private final int idOffset; - - LazyLoadingAttributesNode(int idOffset) { - super(Collections. emptyList()); - this.idOffset = idOffset; + private AttributesNode findAttributes(ObjectReader reader) + throws IOException { + CanonicalTreeParser itr = new CanonicalTreeParser(); + itr.reset(raw); + if (itr.findFile(ATTRS)) { + return loadAttributes(reader, itr.getEntryObjectId()); } + return noAttributes(); + } - AttributesNode load(ObjectReader reader) throws IOException { - AttributesNode r = new AttributesNode(); - ObjectId id = ObjectId.fromRaw(raw, idOffset); - ObjectLoader loader = reader.open(id); - if (loader != null) { - InputStream in = loader.openStream(); - try { - r.parse(in); - } finally { - in.close(); - } - } - return r.getRules().isEmpty() ? null : r; + private static AttributesNode loadAttributes(ObjectReader reader, + AnyObjectId id) throws IOException { + AttributesNode r = new AttributesNode(); + try (InputStream in = reader.open(id, OBJ_BLOB).openStream()) { + r.parse(in); } + return r.getRules().isEmpty() ? noAttributes() : r; + } + + private static AttributesNode noAttributes() { + return new AttributesNode(Collections. emptyList()); } } -- cgit v1.2.3 From 2d011cd648ce33bc6eee577c380a178cb22bfe54 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Sat, 28 Nov 2015 09:23:59 -0800 Subject: DirCacheBuilder: Speed up reading from trees Recursively copying a tree into a DirCache is a bottleneck for some algorithms like the in memory merge code in Gerrit Code Review. Drop a layer down in the stack and use CanonicalTreeParser directly as the addition logic only processes 1 tree at a time and does not need the merge sorting feature (or overhead) of TreeWalk. Combined with 761814fe9c ("DirCacheEntry: Speed up creation by avoiding string cast") tree loading 38,900 entries nearly halves in running time from 70ms to 36ms on some platforms. Change-Id: If1490ca25de0679a71cf508f59b486f9cc816165 --- .../org/eclipse/jgit/dircache/DirCacheBuilder.java | 68 ++++++++++++++++------ .../org/eclipse/jgit/dircache/DirCacheEntry.java | 4 ++ 2 files changed, 53 insertions(+), 19 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheBuilder.java b/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheBuilder.java index 73405cb40a..cfebe2d073 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheBuilder.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheBuilder.java @@ -44,6 +44,9 @@ package org.eclipse.jgit.dircache; +import static org.eclipse.jgit.lib.FileMode.TYPE_MASK; +import static org.eclipse.jgit.lib.FileMode.TYPE_TREE; + import java.io.IOException; import java.text.MessageFormat; import java.util.Arrays; @@ -51,9 +54,7 @@ import java.util.Arrays; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.ObjectReader; -import org.eclipse.jgit.treewalk.AbstractTreeIterator; import org.eclipse.jgit.treewalk.CanonicalTreeParser; -import org.eclipse.jgit.treewalk.TreeWalk; /** * Updates a {@link DirCache} by adding individual {@link DirCacheEntry}s. @@ -163,27 +164,56 @@ public class DirCacheBuilder extends BaseDirCacheEditor { * @throws IOException * a tree cannot be read to iterate through its entries. */ - public void addTree(final byte[] pathPrefix, final int stage, - final ObjectReader reader, final AnyObjectId tree) throws IOException { - final TreeWalk tw = new TreeWalk(reader); - tw.addTree(new CanonicalTreeParser(pathPrefix, reader, tree - .toObjectId())); - tw.setRecursive(true); - if (tw.next()) { - final DirCacheEntry newEntry = toEntry(stage, tw); - beforeAdd(newEntry); - fastAdd(newEntry); - while (tw.next()) - fastAdd(toEntry(stage, tw)); + public void addTree(byte[] pathPrefix, int stage, ObjectReader reader, + AnyObjectId tree) throws IOException { + CanonicalTreeParser p = createTreeParser(pathPrefix, reader, tree); + while (!p.eof()) { + if (isTree(p)) { + p = enterTree(p, reader); + continue; + } + + DirCacheEntry first = toEntry(stage, p); + beforeAdd(first); + fastAdd(first); + p = p.next(); + break; + } + + // Rest of tree entries are correctly sorted; use fastAdd(). + while (!p.eof()) { + if (isTree(p)) { + p = enterTree(p, reader); + } else { + fastAdd(toEntry(stage, p)); + p = p.next(); + } } } - private DirCacheEntry toEntry(final int stage, final TreeWalk tw) { - final DirCacheEntry e = new DirCacheEntry(tw.getRawPath(), stage); - final AbstractTreeIterator i; + private static CanonicalTreeParser createTreeParser(byte[] pathPrefix, + ObjectReader reader, AnyObjectId tree) throws IOException { + return new CanonicalTreeParser(pathPrefix, reader, tree); + } + + private static boolean isTree(CanonicalTreeParser p) { + return (p.getEntryRawMode() & TYPE_MASK) == TYPE_TREE; + } + + private static CanonicalTreeParser enterTree(CanonicalTreeParser p, + ObjectReader reader) throws IOException { + p = p.createSubtreeIterator(reader); + return p.eof() ? p.next() : p; + } + + private static DirCacheEntry toEntry(int stage, CanonicalTreeParser i) { + byte[] buf = i.getEntryPathBuffer(); + int len = i.getEntryPathLength(); + byte[] path = new byte[len]; + System.arraycopy(buf, 0, path, 0, len); - i = tw.getTree(0, AbstractTreeIterator.class); - e.setFileMode(tw.getFileMode(0)); + DirCacheEntry e = new DirCacheEntry(path, stage); + e.setFileMode(i.getEntryRawMode()); e.setObjectIdFromRaw(i.idBuffer(), i.idOffset()); return e; } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheEntry.java b/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheEntry.java index 22c32ffd17..c8bc0960f4 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheEntry.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheEntry.java @@ -505,6 +505,10 @@ public class DirCacheEntry { NB.encodeInt32(info, infoOffset + P_MODE, mode.getBits()); } + void setFileMode(int mode) { + NB.encodeInt32(info, infoOffset + P_MODE, mode); + } + /** * Get the cached creation time of this file, in milliseconds. * -- cgit v1.2.3