From: Zhen Chen Date: Thu, 8 Jun 2017 23:55:26 +0000 (-0700) Subject: Defer object collision check until pack stream is done X-Git-Tag: v4.9.0.201710071750-r~168 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=9a3e037726ab7a65bb4ec439c32fee962e105882;p=jgit.git Defer object collision check until pack stream is done Object collision check requires read from local storage which may be slow. We already delay this check for blobs, this change will also delay other objects until the pack stream is closed. In this way, there is no readCurs call until the pack stream is closed. Change-Id: I3c8c4720dd19a5f64f8c7ddf07d815ed6877b6aa Signed-off-by: Zhen Chen --- diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java index c82b3891b5..259b7bb14e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java @@ -173,8 +173,8 @@ public abstract class PackParser { private LongMap baseByPos; - /** Blobs whose contents need to be double-checked after indexing. */ - private BlockList deferredCheckBlobs; + /** Objects need to be double-checked for collision after indexing. */ + private BlockList collisionCheckObjs; private MessageDigest packDigest; @@ -528,7 +528,7 @@ public abstract class PackParser { entries = new PackedObjectInfo[(int) objectCount]; baseById = new ObjectIdOwnerMap<>(); baseByPos = new LongMap<>(); - deferredCheckBlobs = new BlockList<>(); + collisionCheckObjs = new BlockList<>(); receiving.beginTask(JGitText.get().receivingObjects, (int) objectCount); @@ -545,8 +545,10 @@ public abstract class PackParser { receiving.endTask(); } - if (!deferredCheckBlobs.isEmpty()) - doDeferredCheckBlobs(); + if (!collisionCheckObjs.isEmpty()) { + checkObjectCollision(); + } + if (deltaCount > 0) { if (resolving instanceof BatchingProgressMonitor) { ((BatchingProgressMonitor) resolving).setDelayStart( @@ -675,6 +677,9 @@ public abstract class PackParser { objectDigest.digest(tempObjectId); verifySafeObject(tempObjectId, type, visit.data); + if (isCheckObjectCollisions() && readCurs.has(tempObjectId)) { + checkObjectCollision(tempObjectId, type, visit.data); + } PackedObjectInfo oe; oe = newInfo(tempObjectId, visit.delta, visit.parent.id); @@ -1031,7 +1036,6 @@ public abstract class PackParser { objectDigest.update((byte) 0); final byte[] data; - boolean checkContentLater = false; if (type == Constants.OBJ_BLOB) { byte[] readBuffer = buffer(); InputStream inf = inflate(Source.INPUT, sz); @@ -1045,10 +1049,7 @@ public abstract class PackParser { } inf.close(); objectDigest.digest(tempObjectId); - checkContentLater = isCheckObjectCollisions() - && readCurs.has(tempObjectId); data = null; - } else { data = inflateAndReturn(Source.INPUT, sz); objectDigest.update(data); @@ -1062,8 +1063,10 @@ public abstract class PackParser { if (data != null) onInflatedObjectData(obj, type, data); addObjectAndTrack(obj); - if (checkContentLater) - deferredCheckBlobs.add(obj); + + if (isCheckObjectCollisions()) { + collisionCheckObjs.add(obj); + } } private void verifySafeObject(final AnyObjectId id, final int type, @@ -1078,65 +1081,73 @@ public abstract class PackParser { throw new CorruptObjectException(MessageFormat.format( JGitText.get().invalidObject, Constants.typeString(type), - readCurs.abbreviate(id, 10).name(), + id.name(), e.getMessage()), e); } } + } - if (isCheckObjectCollisions()) { - try { - final ObjectLoader ldr = readCurs.open(id, type); - final byte[] existingData = ldr.getCachedBytes(data.length); - if (!Arrays.equals(data, existingData)) { - throw new IOException(MessageFormat.format( - JGitText.get().collisionOn, id.name())); - } - } catch (MissingObjectException notLocal) { - // This is OK, we don't have a copy of the object locally - // but the API throws when we try to read it as usually its - // an error to read something that doesn't exist. + private void checkObjectCollision() throws IOException { + for (PackedObjectInfo obj : collisionCheckObjs) { + if (!readCurs.has(obj)) { + continue; } + checkObjectCollision(obj); } } - private void doDeferredCheckBlobs() throws IOException { + private void checkObjectCollision(PackedObjectInfo obj) + throws IOException { + ObjectTypeAndSize info = openDatabase(obj, new ObjectTypeAndSize()); final byte[] readBuffer = buffer(); final byte[] curBuffer = new byte[readBuffer.length]; - ObjectTypeAndSize info = new ObjectTypeAndSize(); - - for (PackedObjectInfo obj : deferredCheckBlobs) { - info = openDatabase(obj, info); - - if (info.type != Constants.OBJ_BLOB) + long sz = info.size; + InputStream pck = null; + try (ObjectStream cur = readCurs.open(obj, info.type).openStream()) { + if (cur.getSize() != sz) { throw new IOException(MessageFormat.format( - JGitText.get().unknownObjectType, - Integer.valueOf(info.type))); - - ObjectStream cur = readCurs.open(obj, info.type).openStream(); - try { - long sz = info.size; - if (cur.getSize() != sz) - throw new IOException(MessageFormat.format( - JGitText.get().collisionOn, obj.name())); - InputStream pck = inflate(Source.DATABASE, sz); - while (0 < sz) { - int n = (int) Math.min(readBuffer.length, sz); - IO.readFully(cur, curBuffer, 0, n); - IO.readFully(pck, readBuffer, 0, n); - for (int i = 0; i < n; i++) { - if (curBuffer[i] != readBuffer[i]) - throw new IOException(MessageFormat.format(JGitText - .get().collisionOn, obj.name())); + JGitText.get().collisionOn, obj.name())); + } + pck = inflate(Source.DATABASE, sz); + while (0 < sz) { + int n = (int) Math.min(readBuffer.length, sz); + IO.readFully(cur, curBuffer, 0, n); + IO.readFully(pck, readBuffer, 0, n); + for (int i = 0; i < n; i++) { + if (curBuffer[i] != readBuffer[i]) { + throw new IOException(MessageFormat.format(JGitText + .get().collisionOn, obj.name())); } - sz -= n; } + sz -= n; + } + } catch (MissingObjectException notLocal) { + // This is OK, we don't have a copy of the object locally + // but the API throws when we try to read it as usually its + // an error to read something that doesn't exist. + } finally { + if (pck != null) { pck.close(); - } finally { - cur.close(); } } } + private void checkObjectCollision(AnyObjectId obj, int type, byte[] data) + throws IOException { + try { + final ObjectLoader ldr = readCurs.open(obj, type); + final byte[] existingData = ldr.getCachedBytes(data.length); + if (!Arrays.equals(data, existingData)) { + throw new IOException(MessageFormat.format( + JGitText.get().collisionOn, obj.name())); + } + } catch (MissingObjectException notLocal) { + // This is OK, we don't have a copy of the object locally + // but the API throws when we try to read it as usually its + // an error to read something that doesn't exist. + } + } + /** @return current position of the input stream being parsed. */ private long streamPosition() { return bBase + bOffset;