--- /dev/null
+/*
+ * Copyright (C) 2010, Google Inc.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.storage.pack;
+
+import java.io.IOException;
+
+import org.eclipse.jgit.errors.IncorrectObjectTypeException;
+import org.eclipse.jgit.errors.LargeObjectException;
+import org.eclipse.jgit.errors.MissingObjectException;
+import org.eclipse.jgit.lib.ObjectReader;
+import org.eclipse.jgit.lib.ProgressMonitor;
+import org.eclipse.jgit.util.TemporaryBuffer;
+
+class DeltaWindow {
+ private static final int NEXT_RES = 0;
+
+ private static final int NEXT_SRC = 1;
+
+ private final PackWriter writer;
+
+ private final ObjectReader reader;
+
+ private final DeltaWindowEntry[] window;
+
+ /** Maximum depth we should create for any delta chain. */
+ private final int maxDepth;
+
+ // The object we are currently considering needs a lot of state:
+
+ /** Position of {@link #res} within {@link #window} array. */
+ private int resSlot;
+
+ /**
+ * Maximum delta chain depth the current object can have.
+ * <p>
+ * This can be smaller than {@link #maxDepth}.
+ */
+ private int resMaxDepth;
+
+ /** Window entry of the object we are currently considering. */
+ private DeltaWindowEntry res;
+
+ /** If we have a delta for {@link #res}, this is the shortest found yet. */
+ private TemporaryBuffer.Heap bestDelta;
+
+ /** If we have {@link #bestDelta}, the window position it was created by. */
+ private int bestSlot;
+
+ DeltaWindow(PackWriter pw, ObjectReader or) {
+ writer = pw;
+ reader = or;
+
+ // C Git increases the window size supplied by the user by 1.
+ // We don't know why it does this, but if the user asks for
+ // window=10, it actually processes with window=11. Because
+ // the window size has the largest direct impact on the final
+ // pack file size, we match this odd behavior here to give us
+ // a better chance of producing a similar sized pack as C Git.
+ //
+ // We would prefer to directly honor the user's request since
+ // PackWriter has a minimum of 2 for the window size, but then
+ // users might complain that JGit is creating a bigger pack file.
+ //
+ window = new DeltaWindowEntry[pw.getDeltaSearchWindowSize() + 1];
+ for (int i = 0; i < window.length; i++)
+ window[i] = new DeltaWindowEntry();
+
+ maxDepth = pw.getMaxDeltaDepth();
+ }
+
+ void search(ProgressMonitor monitor, ObjectToPack[] toSearch, int off,
+ int cnt) throws IOException {
+ for (int end = off + cnt; off < end; off++) {
+ monitor.update(1);
+
+ res = window[resSlot];
+ res.set(toSearch[off]);
+
+ if (res.object.isDoNotDelta()) {
+ // PackWriter marked edge objects with the do-not-delta flag.
+ // They are the only ones that appear in toSearch with it set,
+ // but we don't actually want to make a delta for them, just
+ // need to push them into the window so they can be read by
+ // other objects coming through.
+ //
+ keepInWindow();
+ } else {
+ // Search for a delta for the current window slot.
+ //
+ search();
+ }
+ }
+ }
+
+ private void search() throws IOException {
+ // TODO(spearce) If the object is used as a base for other
+ // objects in this pack we should limit the depth we create
+ // for ourselves to be the remainder of our longest dependent
+ // chain and the configured maximum depth. This can happen
+ // when the dependents are being reused out a pack, but we
+ // cannot be because we are near the edge of a thin pack.
+ //
+ resMaxDepth = maxDepth;
+
+ // Loop through the window backwards, considering every entry.
+ // This lets us look at the bigger objects that came before.
+ //
+ for (int srcSlot = prior(resSlot); srcSlot != resSlot; srcSlot = prior(srcSlot)) {
+ DeltaWindowEntry src = window[srcSlot];
+ if (src.empty())
+ break;
+ if (delta(src, srcSlot) == NEXT_RES) {
+ bestDelta = null;
+ return;
+ }
+ }
+
+ // We couldn't find a suitable delta for this object, but it may
+ // still be able to act as a base for another one.
+ //
+ if (bestDelta == null) {
+ keepInWindow();
+ return;
+ }
+
+ // Select this best matching delta as the base for the object.
+ //
+ ObjectToPack srcObj = window[bestSlot].object;
+ ObjectToPack resObj = res.object;
+ if (srcObj.isDoNotDelta()) {
+ // The source (the delta base) is an edge object outside of the
+ // pack. Its part of the common base set that the peer already
+ // has on hand, so we don't want to send it. We have to store
+ // an ObjectId and *NOT* an ObjectToPack for the base to ensure
+ // the base isn't included in the outgoing pack file.
+ //
+ resObj.setDeltaBase(srcObj.copy());
+ } else {
+ // The base is part of the pack we are sending, so it should be
+ // a direct pointer to the base.
+ //
+ resObj.setDeltaBase(srcObj);
+ }
+ resObj.setDeltaDepth(srcObj.getDeltaDepth() + 1);
+ resObj.clearReuseAsIs();
+
+ // Discard the cached best result, otherwise it leaks.
+ //
+ bestDelta = null;
+
+ // If this should be the end of a chain, don't keep
+ // it in the window. Just move on to the next object.
+ //
+ if (resObj.getDeltaDepth() == maxDepth)
+ return;
+
+ shuffleBaseUpInPriority();
+ keepInWindow();
+ }
+
+ private int delta(final DeltaWindowEntry src, final int srcSlot)
+ throws IOException {
+ // Objects must use only the same type as their delta base.
+ // If we are looking at something where that isn't true we
+ // have exhausted everything of the correct type and should
+ // move on to the next thing to examine.
+ //
+ if (src.type() != res.type()) {
+ keepInWindow();
+ return NEXT_RES;
+ }
+
+ // Only consider a source with a short enough delta chain.
+ if (src.depth() > resMaxDepth)
+ return NEXT_SRC;
+
+ // Estimate a reasonable upper limit on delta size.
+ int msz = deltaSizeLimit(res, resMaxDepth, src);
+ if (msz <= 8)
+ return NEXT_SRC;
+
+ // If we have to insert a lot to make this work, find another.
+ if (res.size() - src.size() > msz)
+ return NEXT_SRC;
+
+ // If the sizes are radically different, this is a bad pairing.
+ if (res.size() < src.size() / 16)
+ return NEXT_SRC;
+
+ DeltaIndex srcIndex;
+ try {
+ srcIndex = index(src);
+ } catch (LargeObjectException tooBig) {
+ // If the source is too big to work on, skip it.
+ dropFromWindow(srcSlot);
+ return NEXT_SRC;
+ } catch (IOException notAvailable) {
+ if (src.object.isDoNotDelta()) {
+ // This is an edge that is suddenly not available.
+ dropFromWindow(srcSlot);
+ return NEXT_SRC;
+ } else {
+ throw notAvailable;
+ }
+ }
+
+ byte[] resBuf;
+ try {
+ resBuf = buffer(res);
+ } catch (LargeObjectException tooBig) {
+ // If its too big, move on to another item.
+ return NEXT_RES;
+ }
+
+ // If we already have a delta for the current object, abort
+ // encoding early if this new pairing produces a larger delta.
+ if (bestDelta != null && bestDelta.length() < msz)
+ msz = (int) bestDelta.length();
+
+ TemporaryBuffer.Heap delta = new TemporaryBuffer.Heap(msz);
+ try {
+ if (!srcIndex.encode(delta, resBuf, msz))
+ return NEXT_SRC;
+ } catch (IOException deltaTooBig) {
+ // This only happens when the heap overflows our limit.
+ return NEXT_SRC;
+ }
+
+ if (isBetterDelta(src, delta)) {
+ bestDelta = delta;
+ bestSlot = srcSlot;
+ }
+
+ return NEXT_SRC;
+ }
+
+ private void shuffleBaseUpInPriority() {
+ // Shuffle the entire window so that the best match we just used
+ // is at our current index, and our current object is at the index
+ // before it. Slide any entries in between to make space.
+ //
+ window[resSlot] = window[bestSlot];
+
+ DeltaWindowEntry next = res;
+ int slot = prior(resSlot);
+ for (; slot != bestSlot; slot = prior(slot)) {
+ DeltaWindowEntry e = window[slot];
+ window[slot] = next;
+ next = e;
+ }
+ window[slot] = next;
+ }
+
+ private void keepInWindow() {
+ if (++resSlot == window.length)
+ resSlot = 0;
+ }
+
+ private int prior(int slot) {
+ if (slot == 0)
+ return window.length - 1;
+ return slot - 1;
+ }
+
+ private void dropFromWindow(@SuppressWarnings("unused") int srcSlot) {
+ // We should drop the current source entry from the window,
+ // it is somehow invalid for us to work with.
+ }
+
+ private boolean isBetterDelta(DeltaWindowEntry src,
+ TemporaryBuffer.Heap resDelta) {
+ if (bestDelta == null)
+ return true;
+
+ // If both delta sequences are the same length, use the one
+ // that has a shorter delta chain since it would be faster
+ // to access during reads.
+ //
+ if (resDelta.length() == bestDelta.length())
+ return src.depth() < window[bestSlot].depth();
+
+ return resDelta.length() < bestDelta.length();
+ }
+
+ private static int deltaSizeLimit(DeltaWindowEntry res, int maxDepth,
+ DeltaWindowEntry src) {
+ // Ideally the delta is at least 50% of the original size,
+ // but we also want to account for delta header overhead in
+ // the pack file (to point to the delta base) so subtract off
+ // some of those header bytes from the limit.
+ //
+ final int limit = res.size() / 2 - 20;
+
+ // Distribute the delta limit over the entire chain length.
+ // This is weighted such that deeper items in the chain must
+ // be even smaller than if they were earlier in the chain, as
+ // they cost significantly more to unpack due to the increased
+ // number of recursive unpack calls.
+ //
+ final int remainingDepth = maxDepth - src.depth();
+ return (limit * remainingDepth) / maxDepth;
+ }
+
+ private DeltaIndex index(DeltaWindowEntry ent)
+ throws MissingObjectException, IncorrectObjectTypeException,
+ IOException, LargeObjectException {
+ DeltaIndex idx = ent.index;
+ if (idx == null) {
+ try {
+ idx = new DeltaIndex(buffer(ent));
+ } catch (OutOfMemoryError noMemory) {
+ LargeObjectException e = new LargeObjectException(ent.object);
+ e.initCause(noMemory);
+ throw e;
+ }
+ ent.index = idx;
+ }
+ return idx;
+ }
+
+ private byte[] buffer(DeltaWindowEntry ent) throws MissingObjectException,
+ IncorrectObjectTypeException, IOException, LargeObjectException {
+ byte[] buf = ent.buffer;
+ if (buf == null)
+ ent.buffer = buf = writer.buffer(reader, ent.object);
+ return buf;
+ }
+}
\ No newline at end of file
--- /dev/null
+/*
+ * Copyright (C) 2010, Google Inc.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.storage.pack;
+
+class DeltaWindowEntry {
+ ObjectToPack object;
+
+ /** Complete contents of this object. Lazily loaded. */
+ byte[] buffer;
+
+ /** Index of this object's content, to encode other deltas. Lazily loaded. */
+ DeltaIndex index;
+
+ void set(ObjectToPack object) {
+ this.object = object;
+ this.index = null;
+ this.buffer = null;
+ }
+
+ /** @return current delta chain depth of this object. */
+ int depth() {
+ return object.getDeltaDepth();
+ }
+
+ /** @return type of the object in this window entry. */
+ int type() {
+ return object.getType();
+ }
+
+ /** @return estimated unpacked size of the object, in bytes . */
+ int size() {
+ return object.getWeight();
+ }
+
+ /** @return true if there is no object stored in this entry. */
+ boolean empty() {
+ return object == null;
+ }
+}
import static org.eclipse.jgit.storage.pack.StoredObjectRepresentation.PACK_WHOLE;
import java.io.IOException;
+import java.io.InputStream;
import java.io.OutputStream;
import java.security.MessageDigest;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
+import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.zip.Deflater;
import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
+import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevSort;
import org.eclipse.jgit.storage.file.PackIndexWriter;
+import org.eclipse.jgit.util.IO;
+import org.eclipse.jgit.util.TemporaryBuffer;
/**
* <p>
if ((reuseDeltas || reuseObjects) && reuseSupport != null)
searchForReuse();
+ if (deltaCompress)
+ searchForDeltas(compressMonitor);
final PackOutputStream out = new PackOutputStream(writeMonitor,
packStream, this);
}
}
+ private void searchForDeltas(ProgressMonitor monitor)
+ throws MissingObjectException, IncorrectObjectTypeException,
+ IOException {
+ // Commits and annotated tags tend to have too many differences to
+ // really benefit from delta compression. Consequently just don't
+ // bother examining those types here.
+ //
+ ObjectToPack[] list = new ObjectToPack[
+ objectsLists[Constants.OBJ_TREE].size()
+ + objectsLists[Constants.OBJ_BLOB].size()
+ + edgeObjects.size()];
+ int cnt = 0;
+ cnt = findObjectsNeedingDelta(list, cnt, Constants.OBJ_TREE);
+ cnt = findObjectsNeedingDelta(list, cnt, Constants.OBJ_BLOB);
+ if (cnt == 0)
+ return;
+
+ // Queue up any edge objects that we might delta against. We won't
+ // be sending these as we assume the other side has them, but we need
+ // them in the search phase below.
+ //
+ for (ObjectToPack eo : edgeObjects) {
+ try {
+ if (loadSize(eo))
+ list[cnt++] = eo;
+ } catch (IOException notAvailable) {
+ // Skip this object. Since we aren't going to write it out
+ // the only consequence of it being unavailable to us is we
+ // may produce a larger data stream than we could have.
+ //
+ if (!ignoreMissingUninteresting)
+ throw notAvailable;
+ }
+ }
+
+ monitor.beginTask(COMPRESSING_OBJECTS_PROGRESS, cnt);
+
+ // Sort the objects by path hash so like files are near each other,
+ // and then by size descending so that bigger files are first. This
+ // applies "Linus' Law" which states that newer files tend to be the
+ // bigger ones, because source files grow and hardly ever shrink.
+ //
+ Arrays.sort(list, 0, cnt, new Comparator<ObjectToPack>() {
+ public int compare(ObjectToPack a, ObjectToPack b) {
+ int cmp = a.getType() - b.getType();
+ if (cmp == 0)
+ cmp = (a.getPathHash() >>> 1) - (b.getPathHash() >>> 1);
+ if (cmp == 0)
+ cmp = (a.getPathHash() & 1) - (b.getPathHash() & 1);
+ if (cmp == 0)
+ cmp = b.getWeight() - a.getWeight();
+ return cmp;
+ }
+ });
+ searchForDeltas(monitor, list, cnt);
+ monitor.endTask();
+ }
+
+ private int findObjectsNeedingDelta(ObjectToPack[] list, int cnt, int type)
+ throws MissingObjectException, IncorrectObjectTypeException,
+ IOException {
+ for (ObjectToPack otp : objectsLists[type]) {
+ if (otp.isDoNotDelta()) // delta is disabled for this path
+ continue;
+ if (otp.isDeltaRepresentation()) // already reusing a delta
+ continue;
+ if (loadSize(otp))
+ list[cnt++] = otp;
+ }
+ return cnt;
+ }
+
+ private boolean loadSize(ObjectToPack e) throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ long sz = reader.getObjectSize(e, e.getType());
+
+ // If its too big for us to handle, skip over it.
+ //
+ if (bigFileThreshold <= sz || Integer.MAX_VALUE <= sz)
+ return false;
+
+ // If its too tiny for the delta compression to work, skip it.
+ //
+ if (sz <= DeltaIndex.BLKSZ)
+ return false;
+
+ e.setWeight((int) sz);
+ return true;
+ }
+
+ private void searchForDeltas(ProgressMonitor monitor,
+ ObjectToPack[] list, int cnt) throws MissingObjectException,
+ IncorrectObjectTypeException, LargeObjectException, IOException {
+ DeltaWindow dw = new DeltaWindow(this, reader);
+ dw.search(monitor, list, 0, cnt);
+ }
+
private void writeObjects(ProgressMonitor writeMonitor, PackOutputStream out)
throws IOException {
for (List<ObjectToPack> list : objectsLists) {
// If we reached here, reuse wasn't possible.
//
- writeWholeObjectDeflate(out, otp);
+ if (otp.isDeltaRepresentation())
+ writeDeltaObjectDeflate(out, otp);
+ else
+ writeWholeObjectDeflate(out, otp);
out.endObject();
otp.setCRC(out.getCRC32());
}
dst.finish();
}
+ private void writeDeltaObjectDeflate(PackOutputStream out,
+ final ObjectToPack otp) throws IOException {
+ TemporaryBuffer.Heap delta = delta(otp);
+ out.writeHeader(otp, delta.length());
+
+ Deflater deflater = deflater();
+ deflater.reset();
+ DeflaterOutputStream dst = new DeflaterOutputStream(out, deflater);
+ delta.writeTo(dst, null);
+ dst.finish();
+ }
+
+ private TemporaryBuffer.Heap delta(final ObjectToPack otp)
+ throws IOException {
+ DeltaIndex index = new DeltaIndex(buffer(reader, otp.getDeltaBaseId()));
+ byte[] res = buffer(reader, otp);
+
+ // We never would have proposed this pair if the delta would be
+ // larger than the unpacked version of the object. So using it
+ // as our buffer limit is valid: we will never reach it.
+ //
+ TemporaryBuffer.Heap delta = new TemporaryBuffer.Heap(res.length);
+ index.encode(delta, res);
+ return delta;
+ }
+
+ byte[] buffer(ObjectReader or, AnyObjectId objId) throws IOException {
+ ObjectLoader ldr = or.open(objId);
+ if (!ldr.isLarge())
+ return ldr.getCachedBytes();
+
+ // PackWriter should have already pruned objects that
+ // are above the big file threshold, so our chances of
+ // the object being below it are very good. We really
+ // shouldn't be here, unless the implementation is odd.
+
+ // If it really is too big to work with, abort out now.
+ //
+ long sz = ldr.getSize();
+ if (getBigFileThreshold() <= sz || Integer.MAX_VALUE < sz)
+ throw new LargeObjectException(objId.copy());
+
+ // Its considered to be large by the loader, but we really
+ // want it in byte array format. Try to make it happen.
+ //
+ byte[] buf;
+ try {
+ buf = new byte[(int) sz];
+ } catch (OutOfMemoryError noMemory) {
+ LargeObjectException e;
+
+ e = new LargeObjectException(objId.copy());
+ e.initCause(noMemory);
+ throw e;
+ }
+ InputStream in = ldr.openStream();
+ try {
+ IO.readFully(in, buf, 0, buf.length);
+ } finally {
+ in.close();
+ }
+ return buf;
+ }
+
private Deflater deflater() {
if (myDeflater == null)
myDeflater = new Deflater(compressionLevel);