]> source.dussan.org Git - jgit.git/commitdiff
Defer object collision check until pack stream is done 40/98740/4
authorZhen Chen <czhen@google.com>
Thu, 8 Jun 2017 23:55:26 +0000 (16:55 -0700)
committerZhen Chen <czhen@google.com>
Fri, 9 Jun 2017 04:57:03 +0000 (21:57 -0700)
Object collision check requires read from local storage which may be
slow. We already delay this check for blobs, this change will also delay
other objects until the pack stream is closed. In this way, there is no
readCurs call until the pack stream is closed.

Change-Id: I3c8c4720dd19a5f64f8c7ddf07d815ed6877b6aa
Signed-off-by: Zhen Chen <czhen@google.com>
org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java

index c82b3891b50a2942eb2a41d28ba22dc939ab72fa..259b7bb14edfeca3666bde8a9d42cdf485ab9a59 100644 (file)
@@ -173,8 +173,8 @@ public abstract class PackParser {
 
        private LongMap<UnresolvedDelta> baseByPos;
 
-       /** Blobs whose contents need to be double-checked after indexing. */
-       private BlockList<PackedObjectInfo> deferredCheckBlobs;
+       /** Objects need to be double-checked for collision after indexing. */
+       private BlockList<PackedObjectInfo> collisionCheckObjs;
 
        private MessageDigest packDigest;
 
@@ -528,7 +528,7 @@ public abstract class PackParser {
                        entries = new PackedObjectInfo[(int) objectCount];
                        baseById = new ObjectIdOwnerMap<>();
                        baseByPos = new LongMap<>();
-                       deferredCheckBlobs = new BlockList<>();
+                       collisionCheckObjs = new BlockList<>();
 
                        receiving.beginTask(JGitText.get().receivingObjects,
                                        (int) objectCount);
@@ -545,8 +545,10 @@ public abstract class PackParser {
                                receiving.endTask();
                        }
 
-                       if (!deferredCheckBlobs.isEmpty())
-                               doDeferredCheckBlobs();
+                       if (!collisionCheckObjs.isEmpty()) {
+                               checkObjectCollision();
+                       }
+
                        if (deltaCount > 0) {
                                if (resolving instanceof BatchingProgressMonitor) {
                                        ((BatchingProgressMonitor) resolving).setDelayStart(
@@ -675,6 +677,9 @@ public abstract class PackParser {
                        objectDigest.digest(tempObjectId);
 
                        verifySafeObject(tempObjectId, type, visit.data);
+                       if (isCheckObjectCollisions() && readCurs.has(tempObjectId)) {
+                               checkObjectCollision(tempObjectId, type, visit.data);
+                       }
 
                        PackedObjectInfo oe;
                        oe = newInfo(tempObjectId, visit.delta, visit.parent.id);
@@ -1031,7 +1036,6 @@ public abstract class PackParser {
                objectDigest.update((byte) 0);
 
                final byte[] data;
-               boolean checkContentLater = false;
                if (type == Constants.OBJ_BLOB) {
                        byte[] readBuffer = buffer();
                        InputStream inf = inflate(Source.INPUT, sz);
@@ -1045,10 +1049,7 @@ public abstract class PackParser {
                        }
                        inf.close();
                        objectDigest.digest(tempObjectId);
-                       checkContentLater = isCheckObjectCollisions()
-                                       && readCurs.has(tempObjectId);
                        data = null;
-
                } else {
                        data = inflateAndReturn(Source.INPUT, sz);
                        objectDigest.update(data);
@@ -1062,8 +1063,10 @@ public abstract class PackParser {
                if (data != null)
                        onInflatedObjectData(obj, type, data);
                addObjectAndTrack(obj);
-               if (checkContentLater)
-                       deferredCheckBlobs.add(obj);
+
+               if (isCheckObjectCollisions()) {
+                       collisionCheckObjs.add(obj);
+               }
        }
 
        private void verifySafeObject(final AnyObjectId id, final int type,
@@ -1078,65 +1081,73 @@ public abstract class PackParser {
                                throw new CorruptObjectException(MessageFormat.format(
                                                JGitText.get().invalidObject,
                                                Constants.typeString(type),
-                                               readCurs.abbreviate(id, 10).name(),
+                                               id.name(),
                                                e.getMessage()), e);
                        }
                }
+       }
 
-               if (isCheckObjectCollisions()) {
-                       try {
-                               final ObjectLoader ldr = readCurs.open(id, type);
-                               final byte[] existingData = ldr.getCachedBytes(data.length);
-                               if (!Arrays.equals(data, existingData)) {
-                                       throw new IOException(MessageFormat.format(
-                                                       JGitText.get().collisionOn, id.name()));
-                               }
-                       } catch (MissingObjectException notLocal) {
-                               // This is OK, we don't have a copy of the object locally
-                               // but the API throws when we try to read it as usually its
-                               // an error to read something that doesn't exist.
+       private void checkObjectCollision() throws IOException {
+               for (PackedObjectInfo obj : collisionCheckObjs) {
+                       if (!readCurs.has(obj)) {
+                               continue;
                        }
+                       checkObjectCollision(obj);
                }
        }
 
-       private void doDeferredCheckBlobs() throws IOException {
+       private void checkObjectCollision(PackedObjectInfo obj)
+                       throws IOException {
+               ObjectTypeAndSize info = openDatabase(obj, new ObjectTypeAndSize());
                final byte[] readBuffer = buffer();
                final byte[] curBuffer = new byte[readBuffer.length];
-               ObjectTypeAndSize info = new ObjectTypeAndSize();
-
-               for (PackedObjectInfo obj : deferredCheckBlobs) {
-                       info = openDatabase(obj, info);
-
-                       if (info.type != Constants.OBJ_BLOB)
+               long sz = info.size;
+               InputStream pck = null;
+               try (ObjectStream cur = readCurs.open(obj, info.type).openStream()) {
+                       if (cur.getSize() != sz) {
                                throw new IOException(MessageFormat.format(
-                                               JGitText.get().unknownObjectType,
-                                               Integer.valueOf(info.type)));
-
-                       ObjectStream cur = readCurs.open(obj, info.type).openStream();
-                       try {
-                               long sz = info.size;
-                               if (cur.getSize() != sz)
-                                       throw new IOException(MessageFormat.format(
-                                                       JGitText.get().collisionOn, obj.name()));
-                               InputStream pck = inflate(Source.DATABASE, sz);
-                               while (0 < sz) {
-                                       int n = (int) Math.min(readBuffer.length, sz);
-                                       IO.readFully(cur, curBuffer, 0, n);
-                                       IO.readFully(pck, readBuffer, 0, n);
-                                       for (int i = 0; i < n; i++) {
-                                               if (curBuffer[i] != readBuffer[i])
-                                                       throw new IOException(MessageFormat.format(JGitText
-                                                                       .get().collisionOn, obj.name()));
+                                               JGitText.get().collisionOn, obj.name()));
+                       }
+                       pck = inflate(Source.DATABASE, sz);
+                       while (0 < sz) {
+                               int n = (int) Math.min(readBuffer.length, sz);
+                               IO.readFully(cur, curBuffer, 0, n);
+                               IO.readFully(pck, readBuffer, 0, n);
+                               for (int i = 0; i < n; i++) {
+                                       if (curBuffer[i] != readBuffer[i]) {
+                                               throw new IOException(MessageFormat.format(JGitText
+                                                               .get().collisionOn, obj.name()));
                                        }
-                                       sz -= n;
                                }
+                               sz -= n;
+                       }
+               } catch (MissingObjectException notLocal) {
+                       // This is OK, we don't have a copy of the object locally
+                       // but the API throws when we try to read it as usually its
+                       // an error to read something that doesn't exist.
+               } finally {
+                       if (pck != null) {
                                pck.close();
-                       } finally {
-                               cur.close();
                        }
                }
        }
 
+       private void checkObjectCollision(AnyObjectId obj, int type, byte[] data)
+                       throws IOException {
+               try {
+                       final ObjectLoader ldr = readCurs.open(obj, type);
+                       final byte[] existingData = ldr.getCachedBytes(data.length);
+                       if (!Arrays.equals(data, existingData)) {
+                               throw new IOException(MessageFormat.format(
+                                               JGitText.get().collisionOn, obj.name()));
+                       }
+               } catch (MissingObjectException notLocal) {
+                       // This is OK, we don't have a copy of the object locally
+                       // but the API throws when we try to read it as usually its
+                       // an error to read something that doesn't exist.
+               }
+       }
+
        /** @return current position of the input stream being parsed. */
        private long streamPosition() {
                return bBase + bOffset;