]> source.dussan.org Git - jgit.git/commitdiff
Avoid storing large packs in block cache during reuse 87/46387/2
authorShawn Pearce <spearce@spearce.org>
Thu, 23 Apr 2015 19:12:43 +0000 (12:12 -0700)
committerShawn Pearce <spearce@spearce.org>
Thu, 23 Apr 2015 22:21:59 +0000 (15:21 -0700)
When a large pack (> 30% of the block cache) is being reused by
copying it pollutes the block cache with noise by storing blocks
that are never referenced again.

Avoid this by streaming the file directly from its channel onto
the output stream.

Change-Id: I2e53de27f3dcfb93de68b1fad45f75ab23e79fe7

org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBlockCache.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBlockCacheConfig.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReaderOptions.java
org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java

index 748a4a38e0e562d769d8357a7a29ba6900fd95e7..f3f75c3fb2abbfa8d64959d99b3db547dcc2289e 100644 (file)
@@ -135,6 +135,9 @@ public final class DfsBlockCache {
        /** Maximum number of bytes the cache should hold. */
        private final long maxBytes;
 
+       /** Pack files smaller than this size can be copied through the cache. */
+       private final long maxStreamThroughCache;
+
        /**
         * Suggested block size to read from pack files in.
         * <p>
@@ -191,6 +194,7 @@ public final class DfsBlockCache {
                        eb = tableSize;
 
                maxBytes = cfg.getBlockLimit();
+               maxStreamThroughCache = (long) (maxBytes * cfg.getStreamRatio());
                blockSize = cfg.getBlockSize();
                blockSizeShift = Integer.numberOfTrailingZeros(blockSize);
 
@@ -206,6 +210,10 @@ public final class DfsBlockCache {
                statMiss = new AtomicLong();
        }
 
+       boolean copyThroughCache(long length) {
+               return length <= maxStreamThroughCache;
+       }
+
        /** @return total number of bytes in the cache. */
        public long getCurrentSize() {
                return liveBytes;
index ca1451a2bfbf9d35790120cd09ea3b9bb0af3339..a7d13defdcba59d4e20ed09aa1a4754d09e3d16f 100644 (file)
@@ -47,7 +47,11 @@ import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_CORE_SECTION;
 import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_DFS_SECTION;
 import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_BLOCK_LIMIT;
 import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_BLOCK_SIZE;
+import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_STREAM_RATIO;
 
+import java.text.MessageFormat;
+
+import org.eclipse.jgit.internal.JGitText;
 import org.eclipse.jgit.lib.Config;
 
 /** Configuration parameters for {@link DfsBlockCache}. */
@@ -59,13 +63,14 @@ public class DfsBlockCacheConfig {
        public static final int MB = 1024 * KB;
 
        private long blockLimit;
-
        private int blockSize;
+       private double streamRatio;
 
        /** Create a default configuration. */
        public DfsBlockCacheConfig() {
                setBlockLimit(32 * MB);
                setBlockSize(64 * KB);
+               setStreamRatio(0.30);
        }
 
        /**
@@ -105,6 +110,27 @@ public class DfsBlockCacheConfig {
                return this;
        }
 
+       /**
+        * @return highest percentage of {@link #getBlockLimit()} a single pack can
+        *         occupy while being copied by the pack reuse strategy. <b>Default
+        *         is 0.30, or 30%</b>.
+        * @since 4.0
+        */
+       public double getStreamRatio() {
+               return streamRatio;
+       }
+
+       /**
+        * @param ratio
+        *            percentage of cache to occupy with a copied pack.
+        * @return {@code this}
+        * @since 4.0
+        */
+       public DfsBlockCacheConfig setStreamRatio(double ratio) {
+               streamRatio = Math.max(0, Math.min(ratio, 1.0));
+               return this;
+       }
+
        /**
         * Update properties by setting fields from the configuration.
         * <p>
@@ -127,6 +153,22 @@ public class DfsBlockCacheConfig {
                                CONFIG_DFS_SECTION,
                                CONFIG_KEY_BLOCK_SIZE,
                                getBlockSize()));
+
+               String v = rc.getString(
+                               CONFIG_CORE_SECTION,
+                               CONFIG_DFS_SECTION,
+                               CONFIG_KEY_STREAM_RATIO);
+               if (v != null) {
+                       try {
+                               setStreamRatio(Double.parseDouble(v));
+                       } catch (NumberFormatException e) {
+                               throw new IllegalArgumentException(MessageFormat.format(
+                                               JGitText.get().enumValueNotSupported3,
+                                               CONFIG_CORE_SECTION,
+                                               CONFIG_DFS_SECTION,
+                                               CONFIG_KEY_STREAM_RATIO, v));
+                       }
+               }
                return this;
        }
 }
index 58df895b4e35f4d0da0213349fb31f218a4c01fd..c384878b8157a87e8e78ef5ade037737f39ea188 100644 (file)
@@ -54,8 +54,11 @@ import java.io.BufferedInputStream;
 import java.io.EOFException;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.ByteBuffer;
 import java.nio.channels.Channels;
+import java.security.MessageDigest;
 import java.text.MessageFormat;
+import java.util.Arrays;
 import java.util.Set;
 import java.util.zip.CRC32;
 import java.util.zip.DataFormatException;
@@ -80,7 +83,6 @@ import org.eclipse.jgit.lib.Constants;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.ObjectLoader;
 import org.eclipse.jgit.lib.Repository;
-import org.eclipse.jgit.util.IO;
 import org.eclipse.jgit.util.LongList;
 
 /**
@@ -466,9 +468,122 @@ public final class DfsPackFile {
 
        void copyPackAsIs(PackOutputStream out, boolean validate, DfsReader ctx)
                        throws IOException {
-               // Pin the first window, this ensures the length is accurate.
-               ctx.pin(this, 0);
-               ctx.copyPackAsIs(this, length, validate, out);
+               MessageDigest md = initCopyPack(out, validate, ctx);
+               long p;
+               if (cache.copyThroughCache(length))
+                       p = copyPackThroughCache(out, ctx, md);
+               else
+                       p = copyPackBypassCache(out, ctx, md);
+               verifyPackChecksum(p, md, ctx);
+       }
+
+       private MessageDigest initCopyPack(PackOutputStream out, boolean validate,
+                       DfsReader ctx) throws IOException {
+               // If the length hasn't been determined yet, pin to set it.
+               if (length == -1)
+                       ctx.pin(this, 0);
+               if (!validate) {
+                       ctx.unpin();
+                       return null;
+               }
+
+               int hdrlen = 12;
+               byte[] buf = out.getCopyBuffer();
+               int n = ctx.copy(this, 0, buf, 0, hdrlen);
+               ctx.unpin();
+               if (n != hdrlen) {
+                       setInvalid();
+                       throw new IOException(MessageFormat.format(
+                                       JGitText.get().packfileIsTruncated, getPackName()));
+               }
+
+               MessageDigest md = Constants.newMessageDigest();
+               md.update(buf, 0, hdrlen);
+               return md;
+       }
+
+       private long copyPackThroughCache(PackOutputStream out, DfsReader ctx,
+                       MessageDigest md) throws IOException {
+               long position = 12;
+               long remaining = length - (12 + 20);
+               while (0 < remaining) {
+                       DfsBlock b = cache.getOrLoad(this, position, ctx);
+                       int ptr = (int) (position - b.start);
+                       int n = (int) Math.min(b.size() - ptr, remaining);
+                       b.write(out, position, n, md);
+                       position += n;
+                       remaining -= n;
+               }
+               return position;
+       }
+
+       private long copyPackBypassCache(PackOutputStream out, DfsReader ctx,
+                       MessageDigest md) throws IOException {
+               try (ReadableChannel rc = ctx.db.openFile(packDesc, PACK)) {
+                       ByteBuffer buf = newCopyBuffer(out, rc, ctx);
+                       long position = 12;
+                       long remaining = length - (12 + 20);
+                       while (0 < remaining) {
+                               DfsBlock b = cache.get(key, alignToBlock(position));
+                               if (b != null) {
+                                       int ptr = (int) (position - b.start);
+                                       int n = (int) Math.min(b.size() - ptr, remaining);
+                                       b.write(out, position, n, md);
+                                       position += n;
+                                       remaining -= n;
+                                       rc.position(position);
+                                       continue;
+                               }
+
+                               buf.position(0);
+                               int n = read(rc, buf);
+                               if (n <= 0) {
+                                       setInvalid();
+                                       throw new IOException(MessageFormat.format(
+                                                       JGitText.get().packfileIsTruncated, getPackName()));
+                               }
+                               if (n > remaining)
+                                       n = (int) remaining;
+                               out.write(buf.array(), 0, n);
+                               if (md != null)
+                                       md.update(buf.array(), 0, n);
+                               position += n;
+                               remaining -= n;
+                       }
+                       return position;
+               }
+       }
+
+       private ByteBuffer newCopyBuffer(PackOutputStream out, ReadableChannel rc,
+                       DfsReader ctx) {
+               int bs = blockSize(rc);
+               int bufferSize = ctx.getOptions().getStreamPackBufferSize();
+               if (bufferSize > bs)
+                       bs = (bufferSize / bs) * bs;
+
+               byte[] copyBuf = out.getCopyBuffer();
+               if (bs > copyBuf.length)
+                       copyBuf = new byte[bs];
+               return ByteBuffer.wrap(copyBuf, 0, bs);
+       }
+
+       private void verifyPackChecksum(long position, MessageDigest md,
+                       DfsReader ctx) throws IOException {
+               if (md != null) {
+                       byte[] buf = new byte[20];
+                       byte[] actHash = md.digest();
+                       if (ctx.copy(this, position, buf, 0, 20) != 20) {
+                               setInvalid();
+                               throw new IOException(MessageFormat.format(
+                                               JGitText.get().packfileIsTruncated, getPackName()));
+                       }
+                       if (!Arrays.equals(actHash, buf)) {
+                               setInvalid();
+                               throw new IOException(MessageFormat.format(
+                                               JGitText.get().packfileCorruptionDetected,
+                                               getPackName()));
+                       }
+               }
        }
 
        void copyAsIs(PackOutputStream out, DfsObjectToPack src,
@@ -692,18 +807,8 @@ public final class DfsPackFile {
 
                ReadableChannel rc = ctx.db.openFile(packDesc, PACK);
                try {
-                       // If the block alignment is not yet known, discover it. Prefer the
-                       // larger size from either the cache or the file itself.
-                       int size = blockSize;
-                       if (size == 0) {
-                               size = rc.blockSize();
-                               if (size <= 0)
-                                       size = cache.getBlockSize();
-                               else if (size < cache.getBlockSize())
-                                       size = (cache.getBlockSize() / size) * size;
-                               blockSize = size;
-                               pos = (pos / size) * size;
-                       }
+                       int size = blockSize(rc);
+                       pos = (pos / size) * size;
 
                        // If the size of the file is not yet known, try to discover it.
                        // Channels may choose to return -1 to indicate they don't
@@ -725,7 +830,7 @@ public final class DfsPackFile {
 
                        byte[] buf = new byte[size];
                        rc.position(pos);
-                       int cnt = IO.read(rc, buf, 0, size);
+                       int cnt = read(rc, ByteBuffer.wrap(buf, 0, size));
                        if (cnt != size) {
                                if (0 <= len) {
                                        throw new EOFException(MessageFormat.format(
@@ -754,6 +859,30 @@ public final class DfsPackFile {
                }
        }
 
+       private int blockSize(ReadableChannel rc) {
+               // If the block alignment is not yet known, discover it. Prefer the
+               // larger size from either the cache or the file itself.
+               int size = blockSize;
+               if (size == 0) {
+                       size = rc.blockSize();
+                       if (size <= 0)
+                               size = cache.getBlockSize();
+                       else if (size < cache.getBlockSize())
+                               size = (cache.getBlockSize() / size) * size;
+                       blockSize = size;
+               }
+               return size;
+       }
+
+       private static int read(ReadableChannel rc, ByteBuffer buf)
+                       throws IOException {
+               int n;
+               do {
+                       n = rc.read(buf);
+               } while (0 < n && buf.hasRemaining());
+               return buf.position();
+       }
+
        ObjectLoader load(DfsReader ctx, long pos)
                        throws IOException {
                try {
index 4cf7cbefced10e4102c7da1a5cdf6a2a6e886747..dc8552e49d99c22d9b0edc2ecfe382788e902952 100644 (file)
 
 package org.eclipse.jgit.internal.storage.dfs;
 
-import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
 import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
 
 import java.io.IOException;
-import java.security.MessageDigest;
-import java.text.MessageFormat;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
@@ -65,7 +61,6 @@ import java.util.zip.Inflater;
 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
 import org.eclipse.jgit.errors.MissingObjectException;
 import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException;
-import org.eclipse.jgit.internal.JGitText;
 import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl;
 import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
 import org.eclipse.jgit.internal.storage.file.PackIndex;
@@ -81,7 +76,6 @@ import org.eclipse.jgit.lib.AsyncObjectLoaderQueue;
 import org.eclipse.jgit.lib.AsyncObjectSizeQueue;
 import org.eclipse.jgit.lib.BitmapIndex;
 import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder;
-import org.eclipse.jgit.lib.Constants;
 import org.eclipse.jgit.lib.InflaterCache;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.ObjectLoader;
@@ -547,52 +541,6 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs {
                return cnt - need;
        }
 
-       void copyPackAsIs(DfsPackFile pack, long length, boolean validate,
-                       PackOutputStream out) throws IOException {
-               MessageDigest md = null;
-               if (validate) {
-                       md = Constants.newMessageDigest();
-                       byte[] buf = out.getCopyBuffer();
-                       pin(pack, 0);
-                       if (block.copy(0, buf, 0, 12) != 12) {
-                               pack.setInvalid();
-                               throw new IOException(MessageFormat.format(
-                                               JGitText.get().packfileIsTruncated, pack.getPackName()));
-                       }
-                       md.update(buf, 0, 12);
-               }
-
-               long position = 12;
-               long remaining = length - (12 + 20);
-               while (0 < remaining) {
-                       pin(pack, position);
-
-                       int ptr = (int) (position - block.start);
-                       int n = (int) Math.min(block.size() - ptr, remaining);
-                       block.write(out, position, n, md);
-                       position += n;
-                       remaining -= n;
-               }
-
-               if (md != null) {
-                       byte[] buf = new byte[20];
-                       byte[] actHash = md.digest();
-
-                       pin(pack, position);
-                       if (block.copy(position, buf, 0, 20) != 20) {
-                               pack.setInvalid();
-                               throw new IOException(MessageFormat.format(
-                                               JGitText.get().packfileIsTruncated, pack.getPackName()));
-                       }
-                       if (!Arrays.equals(actHash, buf)) {
-                               pack.setInvalid();
-                               throw new IOException(MessageFormat.format(
-                                               JGitText.get().packfileCorruptionDetected,
-                                               pack.getPackDescription().getFileName(PACK)));
-                       }
-               }
-       }
-
        /**
         * Inflate a region of the pack starting at {@code position}.
         *
@@ -664,6 +612,10 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs {
                }
        }
 
+       void unpin() {
+               block = null;
+       }
+
        /** Release the current window cursor. */
        @Override
        public void release() {
index 2a625473b967d5ee92d305398535295cce6a3466..84198077eb5c6791bbe056785dd0ea3533b9cc3c 100644 (file)
@@ -46,6 +46,7 @@ package org.eclipse.jgit.internal.storage.dfs;
 import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_CORE_SECTION;
 import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_DFS_SECTION;
 import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_DELTA_BASE_CACHE_LIMIT;
+import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_STREAM_BUFFER;
 import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_STREAM_FILE_TRESHOLD;
 
 import org.eclipse.jgit.lib.Config;
@@ -60,9 +61,10 @@ public class DfsReaderOptions {
        public static final int MiB = 1024 * KiB;
 
        private int deltaBaseCacheLimit;
-
        private int streamFileThreshold;
 
+       private int streamPackBufferSize;
+
        /** Create a default reader configuration. */
        public DfsReaderOptions() {
                setDeltaBaseCacheLimit(10 * MiB);
@@ -104,6 +106,27 @@ public class DfsReaderOptions {
                return this;
        }
 
+       /**
+        * @return number of bytes to use for buffering when streaming a pack file
+        *         during copying. If 0 the block size of the pack is used.
+        * @since 4.0
+        */
+       public int getStreamPackBufferSize() {
+               return streamPackBufferSize;
+       }
+
+       /**
+        * @param bufsz
+        *            new buffer size in bytes for buffers used when streaming pack
+        *            files during copying.
+        * @return {@code this}
+        * @since 4.0
+        */
+       public DfsReaderOptions setStreamPackBufferSize(int bufsz) {
+               streamPackBufferSize = Math.max(0, bufsz);
+               return this;
+       }
+
        /**
         * Update properties by setting fields from the configuration.
         * <p>
@@ -130,6 +153,12 @@ public class DfsReaderOptions {
                sft = Math.min(sft, maxMem / 4); // don't use more than 1/4 of the heap
                sft = Math.min(sft, Integer.MAX_VALUE); // cannot exceed array length
                setStreamFileThreshold((int) sft);
+
+               setStreamPackBufferSize(rc.getInt(
+                               CONFIG_CORE_SECTION,
+                               CONFIG_DFS_SECTION,
+                               CONFIG_KEY_STREAM_BUFFER,
+                               getStreamPackBufferSize()));
                return this;
        }
 }
index 8a2080bac866bfe46257cff4bb4470153ca709a9..a89bcee730b9ab8f3546b1c15e283b70c75a8b63 100644 (file)
@@ -299,4 +299,16 @@ public class ConfigConstants {
         * @since 3.3
         */
        public static final String CONFIG_KEY_PRUNE = "prune";
+
+       /**
+        * The "streamBuffer" key
+        * @since 4.0
+        */
+       public static final String CONFIG_KEY_STREAM_BUFFER = "streamBuffer";
+
+       /**
+        * The "streamRatio" key
+        * @since 4.0
+        */
+       public static final String CONFIG_KEY_STREAM_RATIO = "streamRatio";
 }