]> source.dussan.org Git - jgit.git/commitdiff
PackWriter: Avoid CRC-32 validation when feeding IndexPack 29/2629/1
authorShawn O. Pearce <spearce@spearce.org>
Wed, 2 Mar 2011 20:23:55 +0000 (12:23 -0800)
committerShawn O. Pearce <spearce@spearce.org>
Wed, 2 Mar 2011 20:23:58 +0000 (12:23 -0800)
There is no need to validate the object contents during
copyObjectAsIs if the result is going to be parsed by unpack-objects
or index-pack.  Both programs will compute the SHA-1 of the object,
and also validate most of the pack structure.  For git daemon
like servers, this work is already done on the client end of the
connection, so the server doesn't need to repeat that work itself.

Disable object validation for the 3 transport cases where we know
the remote side will handle object validation for us (push, bundle
creation, and upload pack).  This improves performance on the server
side by reducing the work that must be done.

Change-Id: Iabb78eec45898e4a17f7aab3fb94c004d8d69af6
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ShowPackDelta.java
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectReuseAsIs.java
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java
org.eclipse.jgit/src/org/eclipse/jgit/transport/BasePackPushConnection.java
org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java
org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java

index 0e0c73831ae17bd45fc5288e8fe125638aec263f..ddeec330f7f71a43c8d5c576e6cc64cd22e411f7 100644 (file)
@@ -107,7 +107,7 @@ class ShowPackDelta extends TextBuiltin {
                asis.selectObjectRepresentation(pw, NullProgressMonitor.INSTANCE,
                                Collections.singleton(target));
                asis.copyObjectAsIs(new PackOutputStream(NullProgressMonitor.INSTANCE,
-                               buf, pw), target);
+                               buf, pw), target, true);
 
                // At this point the object header has no delta information,
                // because it was output as though it were a whole object.
index 3a76d4c66adcf23c8b6a2e6aeec7f6186eb54d25..80820b2adee9ee381bb8f228b0c05ed5530a7639 100644 (file)
@@ -313,21 +313,21 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
        }
 
        final void copyAsIs(PackOutputStream out, LocalObjectToPack src,
-                       WindowCursor curs) throws IOException,
+                       boolean validate, WindowCursor curs) throws IOException,
                        StoredObjectRepresentationNotAvailableException {
                beginCopyAsIs(src);
                try {
-                       copyAsIs2(out, src, curs);
+                       copyAsIs2(out, src, validate, curs);
                } finally {
                        endCopyAsIs();
                }
        }
 
        private void copyAsIs2(PackOutputStream out, LocalObjectToPack src,
-                       WindowCursor curs) throws IOException,
+                       boolean validate, WindowCursor curs) throws IOException,
                        StoredObjectRepresentationNotAvailableException {
-               final CRC32 crc1 = new CRC32();
-               final CRC32 crc2 = new CRC32();
+               final CRC32 crc1 = validate ? new CRC32() : null;
+               final CRC32 crc2 = validate ? new CRC32() : null;
                final byte[] buf = out.getCopyBuffer();
 
                // Rip apart the header so we can discover the size.
@@ -348,17 +348,23 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
                        do {
                                c = buf[headerCnt++] & 0xff;
                        } while ((c & 128) != 0);
-                       crc1.update(buf, 0, headerCnt);
-                       crc2.update(buf, 0, headerCnt);
+                       if (validate) {
+                               crc1.update(buf, 0, headerCnt);
+                               crc2.update(buf, 0, headerCnt);
+                       }
                } else if (typeCode == Constants.OBJ_REF_DELTA) {
-                       crc1.update(buf, 0, headerCnt);
-                       crc2.update(buf, 0, headerCnt);
+                       if (validate) {
+                               crc1.update(buf, 0, headerCnt);
+                               crc2.update(buf, 0, headerCnt);
+                       }
 
                        readFully(src.offset + headerCnt, buf, 0, 20, curs);
-                       crc1.update(buf, 0, 20);
-                       crc2.update(buf, 0, 20);
+                       if (validate) {
+                               crc1.update(buf, 0, 20);
+                               crc2.update(buf, 0, 20);
+                       }
                        headerCnt += 20;
-               } else {
+               } else if (validate) {
                        crc1.update(buf, 0, headerCnt);
                        crc2.update(buf, 0, headerCnt);
                }
@@ -374,7 +380,7 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
                try {
                        quickCopy = curs.quickCopy(this, dataOffset, dataLength);
 
-                       if (idx().hasCRC32Support()) {
+                       if (validate && idx().hasCRC32Support()) {
                                // Index has the CRC32 code cached, validate the object.
                                //
                                expectedCRC = idx().findCRC32(src);
@@ -397,7 +403,7 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
                                                        JGitText.get().objectAtHasBadZlibStream,
                                                        src.offset, getPackFile()));
                                }
-                       } else {
+                       } else if (validate) {
                                // We don't have a CRC32 code in the index, so compute it
                                // now while inflating the raw data to get zlib to tell us
                                // whether or not the data is safe.
@@ -427,6 +433,8 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
                                                        src.offset));
                                }
                                expectedCRC = crc1.getValue();
+                       } else {
+                               expectedCRC = -1;
                        }
                } catch (DataFormatException dataFormat) {
                        setCorrupt(src.offset);
@@ -473,12 +481,13 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
                        while (cnt > 0) {
                                final int n = (int) Math.min(cnt, buf.length);
                                readFully(pos, buf, 0, n, curs);
-                               crc2.update(buf, 0, n);
+                               if (validate)
+                                       crc2.update(buf, 0, n);
                                out.write(buf, 0, n);
                                pos += n;
                                cnt -= n;
                        }
-                       if (crc2.getValue() != expectedCRC) {
+                       if (validate && crc2.getValue() != expectedCRC) {
                                throw new CorruptObjectException(MessageFormat.format(JGitText
                                                .get().objectAtHasBadZlibStream, src.offset,
                                                getPackFile()));
index 5a3e01c8c1ec1084e96195a43b6bab5509263c58..661df62cbe5b5ed887864e87ad0272e65a11b2ed 100644 (file)
@@ -143,10 +143,11 @@ final class WindowCursor extends ObjectReader implements ObjectReuseAsIs {
                }
        }
 
-       public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp)
-                       throws IOException, StoredObjectRepresentationNotAvailableException {
+       public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp,
+                       boolean validate) throws IOException,
+                       StoredObjectRepresentationNotAvailableException {
                LocalObjectToPack src = (LocalObjectToPack) otp;
-               src.pack.copyAsIs(out, src, this);
+               src.pack.copyAsIs(out, src, validate, this);
        }
 
        public void writeObjects(PackOutputStream out, List<ObjectToPack> list)
index 480e0519d52a4f086a437250a5f32614b82a6744..8ad0b24197fcf571c4c6a1777ea745cd11bf90c3 100644 (file)
@@ -130,8 +130,9 @@ public interface ObjectReuseAsIs {
         * reduce data locality for the reader, slowing down data access.
         *
         * Invoking {@link PackOutputStream#writeObject(ObjectToPack)} will cause
-        * {@link #copyObjectAsIs(PackOutputStream, ObjectToPack)} to be invoked
-        * recursively on {@code this} if the current object is scheduled for reuse.
+        * {@link #copyObjectAsIs(PackOutputStream, ObjectToPack, boolean)} to be
+        * invoked recursively on {@code this} if the current object is scheduled
+        * for reuse.
         *
         * @param out
         *            the stream to write each object to.
@@ -160,7 +161,11 @@ public interface ObjectReuseAsIs {
         *
         * <pre>
         * MyToPack mtp = (MyToPack) otp;
-        * byte[] raw = validate(mtp); // throw SORNAE here, if at all
+        * byte[] raw;
+        * if (validate)
+        *       raw = validate(mtp); // throw SORNAE here, if at all
+        * else
+        *       raw = readFast(mtp);
         * out.writeHeader(mtp, mtp.inflatedSize);
         * out.write(raw);
         * </pre>
@@ -169,6 +174,11 @@ public interface ObjectReuseAsIs {
         *            stream the object should be written to.
         * @param otp
         *            the object's saved representation information.
+        * @param validate
+        *            if true the representation must be validated and not be
+        *            corrupt before being reused. If false, validation may be
+        *            skipped as it will be performed elsewhere in the processing
+        *            pipeline.
         * @throws StoredObjectRepresentationNotAvailableException
         *             the previously selected representation is no longer
         *             available. If thrown before {@code out.writeHeader} the pack
@@ -179,8 +189,9 @@ public interface ObjectReuseAsIs {
         *             the stream's write method threw an exception. Packing will
         *             abort.
         */
-       public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp)
-                       throws IOException, StoredObjectRepresentationNotAvailableException;
+       public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp,
+                       boolean validate) throws IOException,
+                       StoredObjectRepresentationNotAvailableException;
 
        /**
         * Obtain the available cached packs.
index 57dca95a3bab42a75e73e654a2a08cd946397491..3d060486883dcd6e3c84295757a72a9b29f4e7b2 100644 (file)
@@ -177,6 +177,8 @@ public class PackWriter {
 
        private boolean reuseDeltaCommits;
 
+       private boolean reuseValidate;
+
        private boolean thin;
 
        private boolean useCachedPacks;
@@ -245,6 +247,7 @@ public class PackWriter {
 
                deltaBaseAsOffset = config.isDeltaBaseAsOffset();
                reuseDeltas = config.isReuseDeltas();
+               reuseValidate = true; // be paranoid by default
                stats = new Statistics();
        }
 
@@ -298,6 +301,29 @@ public class PackWriter {
                reuseDeltaCommits = reuse;
        }
 
+       /**
+        * Check if the writer validates objects before copying them.
+        *
+        * @return true if validation is enabled; false if the reader will handle
+        *         object validation as a side-effect of it consuming the output.
+        */
+       public boolean isReuseValidatingObjects() {
+               return reuseValidate;
+       }
+
+       /**
+        * Enable (or disable) object validation during packing.
+        *
+        * @param validate
+        *            if true the pack writer will validate an object before it is
+        *            put into the output. This additional validation work may be
+        *            necessary to avoid propagating corruption from one local pack
+        *            file to another local pack file.
+        */
+       public void setReuseValidatingObjects(boolean validate) {
+               reuseValidate = validate;
+       }
+
        /** @return true if this writer is producing a thin pack. */
        public boolean isThin() {
                return thin;
@@ -1006,7 +1032,7 @@ public class PackWriter {
 
                while (otp.isReuseAsIs()) {
                        try {
-                               reuseSupport.copyObjectAsIs(out, otp);
+                               reuseSupport.copyObjectAsIs(out, otp, reuseValidate);
                                out.endObject();
                                otp.setCRC(out.getCRC32());
                                stats.reusedObjects++;
index 2a5cfd0add07619af3aec20e178e777141916885..4e0536d0cd5db0068aeaccd0eb2c32c1792b6cfd 100644 (file)
@@ -260,6 +260,7 @@ public abstract class BasePackPushConnection extends BasePackConnection implemen
 
                        writer.setUseCachedPacks(true);
                        writer.setThin(thinPack);
+                       writer.setReuseValidatingObjects(false);
                        writer.setDeltaBaseAsOffset(capableOfsDelta);
                        writer.preparePack(monitor, newObjects, remoteObjects);
                        writer.writePack(monitor, monitor, out);
index 3865c8a75d17669d62517cdab4b94fc6087c8b10..9e79d43c35250953e17c426d79926f5f382e6e9b 100644 (file)
@@ -202,6 +202,7 @@ public class BundleWriter {
                                exc.add(r.getId());
                        packWriter.setDeltaBaseAsOffset(true);
                        packWriter.setThin(exc.size() > 0);
+                       packWriter.setReuseValidatingObjects(false);
                        if (exc.size() == 0)
                                packWriter.setTagTargets(tagTargets);
                        packWriter.preparePack(monitor, inc, exc);
index 3e324d09317ac0916e679d75706fb3e7f9d2b84c..798c4638f1e3a220411f39076a2664fed2ef7ccf 100644 (file)
@@ -695,6 +695,7 @@ public class UploadPack {
                        pw.setReuseDeltaCommits(true);
                        pw.setDeltaBaseAsOffset(options.contains(OPTION_OFS_DELTA));
                        pw.setThin(options.contains(OPTION_THIN_PACK));
+                       pw.setReuseValidatingObjects(false);
 
                        if (commonBase.isEmpty()) {
                                Set<ObjectId> tagTargets = new HashSet<ObjectId>();