Browse Source

PackWriter: Avoid CRC-32 validation when feeding IndexPack

There is no need to validate the object contents during
copyObjectAsIs if the result is going to be parsed by unpack-objects
or index-pack.  Both programs will compute the SHA-1 of the object,
and also validate most of the pack structure.  For git daemon
like servers, this work is already done on the client end of the
connection, so the server doesn't need to repeat that work itself.

Disable object validation for the 3 transport cases where we know
the remote side will handle object validation for us (push, bundle
creation, and upload pack).  This improves performance on the server
side by reducing the work that must be done.

Change-Id: Iabb78eec45898e4a17f7aab3fb94c004d8d69af6
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
tags/v0.12.1
Shawn O. Pearce 13 years ago
parent
commit
1b2062fe37

+ 1
- 1
org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ShowPackDelta.java View File

@@ -107,7 +107,7 @@ class ShowPackDelta extends TextBuiltin {
asis.selectObjectRepresentation(pw, NullProgressMonitor.INSTANCE,
Collections.singleton(target));
asis.copyObjectAsIs(new PackOutputStream(NullProgressMonitor.INSTANCE,
buf, pw), target);
buf, pw), target, true);

// At this point the object header has no delta information,
// because it was output as though it were a whole object.

+ 25
- 16
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java View File

@@ -313,21 +313,21 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
}

final void copyAsIs(PackOutputStream out, LocalObjectToPack src,
WindowCursor curs) throws IOException,
boolean validate, WindowCursor curs) throws IOException,
StoredObjectRepresentationNotAvailableException {
beginCopyAsIs(src);
try {
copyAsIs2(out, src, curs);
copyAsIs2(out, src, validate, curs);
} finally {
endCopyAsIs();
}
}

private void copyAsIs2(PackOutputStream out, LocalObjectToPack src,
WindowCursor curs) throws IOException,
boolean validate, WindowCursor curs) throws IOException,
StoredObjectRepresentationNotAvailableException {
final CRC32 crc1 = new CRC32();
final CRC32 crc2 = new CRC32();
final CRC32 crc1 = validate ? new CRC32() : null;
final CRC32 crc2 = validate ? new CRC32() : null;
final byte[] buf = out.getCopyBuffer();

// Rip apart the header so we can discover the size.
@@ -348,17 +348,23 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
do {
c = buf[headerCnt++] & 0xff;
} while ((c & 128) != 0);
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
if (validate) {
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
}
} else if (typeCode == Constants.OBJ_REF_DELTA) {
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
if (validate) {
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
}

readFully(src.offset + headerCnt, buf, 0, 20, curs);
crc1.update(buf, 0, 20);
crc2.update(buf, 0, 20);
if (validate) {
crc1.update(buf, 0, 20);
crc2.update(buf, 0, 20);
}
headerCnt += 20;
} else {
} else if (validate) {
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
}
@@ -374,7 +380,7 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
try {
quickCopy = curs.quickCopy(this, dataOffset, dataLength);

if (idx().hasCRC32Support()) {
if (validate && idx().hasCRC32Support()) {
// Index has the CRC32 code cached, validate the object.
//
expectedCRC = idx().findCRC32(src);
@@ -397,7 +403,7 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
JGitText.get().objectAtHasBadZlibStream,
src.offset, getPackFile()));
}
} else {
} else if (validate) {
// We don't have a CRC32 code in the index, so compute it
// now while inflating the raw data to get zlib to tell us
// whether or not the data is safe.
@@ -427,6 +433,8 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
src.offset));
}
expectedCRC = crc1.getValue();
} else {
expectedCRC = -1;
}
} catch (DataFormatException dataFormat) {
setCorrupt(src.offset);
@@ -473,12 +481,13 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
while (cnt > 0) {
final int n = (int) Math.min(cnt, buf.length);
readFully(pos, buf, 0, n, curs);
crc2.update(buf, 0, n);
if (validate)
crc2.update(buf, 0, n);
out.write(buf, 0, n);
pos += n;
cnt -= n;
}
if (crc2.getValue() != expectedCRC) {
if (validate && crc2.getValue() != expectedCRC) {
throw new CorruptObjectException(MessageFormat.format(JGitText
.get().objectAtHasBadZlibStream, src.offset,
getPackFile()));

+ 4
- 3
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java View File

@@ -143,10 +143,11 @@ final class WindowCursor extends ObjectReader implements ObjectReuseAsIs {
}
}

public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp)
throws IOException, StoredObjectRepresentationNotAvailableException {
public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp,
boolean validate) throws IOException,
StoredObjectRepresentationNotAvailableException {
LocalObjectToPack src = (LocalObjectToPack) otp;
src.pack.copyAsIs(out, src, this);
src.pack.copyAsIs(out, src, validate, this);
}

public void writeObjects(PackOutputStream out, List<ObjectToPack> list)

+ 16
- 5
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectReuseAsIs.java View File

@@ -130,8 +130,9 @@ public interface ObjectReuseAsIs {
* reduce data locality for the reader, slowing down data access.
*
* Invoking {@link PackOutputStream#writeObject(ObjectToPack)} will cause
* {@link #copyObjectAsIs(PackOutputStream, ObjectToPack)} to be invoked
* recursively on {@code this} if the current object is scheduled for reuse.
* {@link #copyObjectAsIs(PackOutputStream, ObjectToPack, boolean)} to be
* invoked recursively on {@code this} if the current object is scheduled
* for reuse.
*
* @param out
* the stream to write each object to.
@@ -160,7 +161,11 @@ public interface ObjectReuseAsIs {
*
* <pre>
* MyToPack mtp = (MyToPack) otp;
* byte[] raw = validate(mtp); // throw SORNAE here, if at all
* byte[] raw;
* if (validate)
* raw = validate(mtp); // throw SORNAE here, if at all
* else
* raw = readFast(mtp);
* out.writeHeader(mtp, mtp.inflatedSize);
* out.write(raw);
* </pre>
@@ -169,6 +174,11 @@ public interface ObjectReuseAsIs {
* stream the object should be written to.
* @param otp
* the object's saved representation information.
* @param validate
* if true the representation must be validated and not be
* corrupt before being reused. If false, validation may be
* skipped as it will be performed elsewhere in the processing
* pipeline.
* @throws StoredObjectRepresentationNotAvailableException
* the previously selected representation is no longer
* available. If thrown before {@code out.writeHeader} the pack
@@ -179,8 +189,9 @@ public interface ObjectReuseAsIs {
* the stream's write method threw an exception. Packing will
* abort.
*/
public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp)
throws IOException, StoredObjectRepresentationNotAvailableException;
public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp,
boolean validate) throws IOException,
StoredObjectRepresentationNotAvailableException;

/**
* Obtain the available cached packs.

+ 27
- 1
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java View File

@@ -177,6 +177,8 @@ public class PackWriter {

private boolean reuseDeltaCommits;

private boolean reuseValidate;

private boolean thin;

private boolean useCachedPacks;
@@ -245,6 +247,7 @@ public class PackWriter {

deltaBaseAsOffset = config.isDeltaBaseAsOffset();
reuseDeltas = config.isReuseDeltas();
reuseValidate = true; // be paranoid by default
stats = new Statistics();
}

@@ -298,6 +301,29 @@ public class PackWriter {
reuseDeltaCommits = reuse;
}

/**
* Check if the writer validates objects before copying them.
*
* @return true if validation is enabled; false if the reader will handle
* object validation as a side-effect of it consuming the output.
*/
public boolean isReuseValidatingObjects() {
return reuseValidate;
}

/**
* Enable (or disable) object validation during packing.
*
* @param validate
* if true the pack writer will validate an object before it is
* put into the output. This additional validation work may be
* necessary to avoid propagating corruption from one local pack
* file to another local pack file.
*/
public void setReuseValidatingObjects(boolean validate) {
reuseValidate = validate;
}

/** @return true if this writer is producing a thin pack. */
public boolean isThin() {
return thin;
@@ -1006,7 +1032,7 @@ public class PackWriter {

while (otp.isReuseAsIs()) {
try {
reuseSupport.copyObjectAsIs(out, otp);
reuseSupport.copyObjectAsIs(out, otp, reuseValidate);
out.endObject();
otp.setCRC(out.getCRC32());
stats.reusedObjects++;

+ 1
- 0
org.eclipse.jgit/src/org/eclipse/jgit/transport/BasePackPushConnection.java View File

@@ -260,6 +260,7 @@ public abstract class BasePackPushConnection extends BasePackConnection implemen

writer.setUseCachedPacks(true);
writer.setThin(thinPack);
writer.setReuseValidatingObjects(false);
writer.setDeltaBaseAsOffset(capableOfsDelta);
writer.preparePack(monitor, newObjects, remoteObjects);
writer.writePack(monitor, monitor, out);

+ 1
- 0
org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java View File

@@ -202,6 +202,7 @@ public class BundleWriter {
exc.add(r.getId());
packWriter.setDeltaBaseAsOffset(true);
packWriter.setThin(exc.size() > 0);
packWriter.setReuseValidatingObjects(false);
if (exc.size() == 0)
packWriter.setTagTargets(tagTargets);
packWriter.preparePack(monitor, inc, exc);

+ 1
- 0
org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java View File

@@ -695,6 +695,7 @@ public class UploadPack {
pw.setReuseDeltaCommits(true);
pw.setDeltaBaseAsOffset(options.contains(OPTION_OFS_DELTA));
pw.setThin(options.contains(OPTION_THIN_PACK));
pw.setReuseValidatingObjects(false);

if (commonBase.isEmpty()) {
Set<ObjectId> tagTargets = new HashSet<ObjectId>();

Loading…
Cancel
Save