From 84c71ac93350e9d1e781efeaeab64e4277d585bc Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Mon, 3 Jul 2017 17:48:34 -0700 Subject: [PATCH] Extract BlockBasedFile base class for DfsPackFile This new base class has the minimum set of properties and methods necessary for DfsBlockCache to manage blocks of a file in the cache. Subclasses can use DfsBlockCache for any content. This refactoring opens the door for additional PackExt types other than PACK to be stored on a block-by-block basis by the DfsBlockCache. Change-Id: I307228fc805c3ff0c596783beb24fd52bec35ba8 --- .../internal/storage/dfs/BlockBasedFile.java | 207 ++++++++++++++++++ .../jgit/internal/storage/dfs/DfsBlock.java | 2 +- .../internal/storage/dfs/DfsBlockCache.java | 26 +-- .../internal/storage/dfs/DfsPackFile.java | 173 ++------------- 4 files changed, 234 insertions(+), 174 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/BlockBasedFile.java diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/BlockBasedFile.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/BlockBasedFile.java new file mode 100644 index 0000000000..befab94fde --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/BlockBasedFile.java @@ -0,0 +1,207 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.dfs; + +import java.io.EOFException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.text.MessageFormat; + +import org.eclipse.jgit.annotations.Nullable; +import org.eclipse.jgit.errors.PackInvalidException; +import org.eclipse.jgit.internal.storage.pack.PackExt; + +/** Block based file stored in {@link DfsBlockCache}. */ +public abstract class BlockBasedFile { + /** Cache that owns this file and its data. */ + final DfsBlockCache cache; + + /** Unique identity of this file while in-memory. */ + final DfsStreamKey key; + + /** Description of the associated pack file's storage. */ + final DfsPackDescription packDesc; + final PackExt ext; + + /** + * Preferred alignment for loading blocks from the backing file. + *

+ * It is initialized to 0 and filled in on the first read made from the + * file. Block sizes may be odd, e.g. 4091, caused by the underling DFS + * storing 4091 user bytes and 5 bytes block metadata into a lower level + * 4096 byte block on disk. + */ + volatile int blockSize; + + /** + * Total number of bytes in this pack file. + *

+ * This field initializes to -1 and gets populated when a block is loaded. + */ + volatile long length; + + /** True once corruption has been detected that cannot be worked around. */ + volatile boolean invalid; + + BlockBasedFile(DfsBlockCache cache, DfsStreamKey key, + DfsPackDescription packDesc, PackExt ext) { + this.cache = cache; + this.key = key; + this.packDesc = packDesc; + this.ext = ext; + } + + String getFileName() { + return packDesc.getFileName(ext); + } + + boolean invalid() { + return invalid; + } + + void setInvalid() { + invalid = true; + } + + void setBlockSize(int newSize) { + blockSize = newSize; + } + + long alignToBlock(long pos) { + int size = blockSize; + if (size == 0) + size = cache.getBlockSize(); + return (pos / size) * size; + } + + int blockSize(ReadableChannel rc) { + // If the block alignment is not yet known, discover it. Prefer the + // larger size from either the cache or the file itself. + int size = blockSize; + if (size == 0) { + size = rc.blockSize(); + if (size <= 0) + size = cache.getBlockSize(); + else if (size < cache.getBlockSize()) + size = (cache.getBlockSize() / size) * size; + blockSize = size; + } + return size; + } + + DfsBlock readOneBlock(long pos, DfsReader ctx, + @Nullable ReadableChannel fileChannel) throws IOException { + if (invalid) + throw new PackInvalidException(getFileName()); + + ctx.stats.readBlock++; + long start = System.nanoTime(); + ReadableChannel rc = fileChannel != null ? fileChannel + : ctx.db.openFile(packDesc, ext); + try { + int size = blockSize(rc); + pos = (pos / size) * size; + + // If the size of the file is not yet known, try to discover it. + // Channels may choose to return -1 to indicate they don't + // know the length yet, in this case read up to the size unit + // given by the caller, then recheck the length. + long len = length; + if (len < 0) { + len = rc.size(); + if (0 <= len) + length = len; + } + + if (0 <= len && len < pos + size) + size = (int) (len - pos); + if (size <= 0) + throw new EOFException(MessageFormat.format( + DfsText.get().shortReadOfBlock, Long.valueOf(pos), + getFileName(), Long.valueOf(0), Long.valueOf(0))); + + byte[] buf = new byte[size]; + rc.position(pos); + int cnt = read(rc, ByteBuffer.wrap(buf, 0, size)); + ctx.stats.readBlockBytes += cnt; + if (cnt != size) { + if (0 <= len) { + throw new EOFException(MessageFormat.format( + DfsText.get().shortReadOfBlock, Long.valueOf(pos), + getFileName(), Integer.valueOf(size), + Integer.valueOf(cnt))); + } + + // Assume the entire thing was read in a single shot, compact + // the buffer to only the space required. + byte[] n = new byte[cnt]; + System.arraycopy(buf, 0, n, 0, n.length); + buf = n; + } else if (len < 0) { + // With no length at the start of the read, the channel should + // have the length available at the end. + length = len = rc.size(); + } + + return new DfsBlock(key, pos, buf); + } finally { + if (rc != fileChannel) { + rc.close(); + } + ctx.stats.readBlockMicros += elapsedMicros(start); + } + } + + static int read(ReadableChannel rc, ByteBuffer buf) throws IOException { + int n; + do { + n = rc.read(buf); + } while (0 < n && buf.hasRemaining()); + return buf.position(); + } + + static long elapsedMicros(long start) { + return (System.nanoTime() - start) / 1000L; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBlock.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBlock.java index 7ac5f25f1a..6667c2fc23 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBlock.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBlock.java @@ -52,7 +52,7 @@ import java.util.zip.Inflater; import org.eclipse.jgit.internal.storage.pack.PackOutputStream; -/** A cached slice of a {@link DfsPackFile}. */ +/** A cached slice of a {@link BlockBasedFile}. */ final class DfsBlock { final DfsStreamKey stream; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBlockCache.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBlockCache.java index 232c763e92..6d46d3a4ab 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBlockCache.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBlockCache.java @@ -57,12 +57,12 @@ import org.eclipse.jgit.annotations.Nullable; import org.eclipse.jgit.internal.JGitText; /** - * Caches slices of a {@link DfsPackFile} in memory for faster read access. + * Caches slices of a {@link BlockBasedFile} in memory for faster read access. *

* The DfsBlockCache serves as a Java based "buffer cache", loading segments of - * a DfsPackFile into the JVM heap prior to use. As JGit often wants to do reads - * of only tiny slices of a file, the DfsBlockCache tries to smooth out these - * tiny reads into larger block-sized IO operations. + * a BlockBasedFile into the JVM heap prior to use. As JGit often wants to do + * reads of only tiny slices of a file, the DfsBlockCache tries to smooth out + * these tiny reads into larger block-sized IO operations. *

* Whenever a cache miss occurs, loading is invoked by exactly one thread for * the given (DfsPackKey,position) key tuple. This is ensured by an @@ -303,24 +303,24 @@ public final class DfsBlockCache { /** * Lookup a cached object, creating and loading it if it doesn't exist. * - * @param pack + * @param file * the pack that "contains" the cached object. * @param position * offset within pack of the object. * @param ctx * current thread's reader. - * @param packChannel + * @param fileChannel * optional channel to read {@code pack}. * @return the object reference. * @throws IOException * the reference was not in the cache and could not be loaded. */ - DfsBlock getOrLoad(DfsPackFile pack, long position, DfsReader ctx, - @Nullable ReadableChannel packChannel) throws IOException { + DfsBlock getOrLoad(BlockBasedFile file, long position, DfsReader ctx, + @Nullable ReadableChannel fileChannel) throws IOException { final long requestedPosition = position; - position = pack.alignToBlock(position); + position = file.alignToBlock(position); - DfsStreamKey key = pack.key; + DfsStreamKey key = file.key; int slot = slot(key, position); HashEntry e1 = table.get(slot); DfsBlock v = scan(e1, key, position); @@ -348,7 +348,7 @@ public final class DfsBlockCache { statMiss.incrementAndGet(); boolean credit = true; try { - v = pack.readOneBlock(position, ctx, packChannel); + v = file.readOneBlock(position, ctx, fileChannel); credit = false; } finally { if (credit) @@ -377,9 +377,9 @@ public final class DfsBlockCache { // If the block size changed from the default, it is possible the block // that was loaded is the wrong block for the requested position. - if (v.contains(pack.key, requestedPosition)) + if (v.contains(file.key, requestedPosition)) return v; - return getOrLoad(pack, requestedPosition, ctx, packChannel); + return getOrLoad(file, requestedPosition, ctx, fileChannel); } @SuppressWarnings("unchecked") diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java index 2f10cd31d0..d35db0f8b0 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java @@ -62,7 +62,6 @@ import java.util.zip.CRC32; import java.util.zip.DataFormatException; import java.util.zip.Inflater; -import org.eclipse.jgit.annotations.Nullable; import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.errors.MissingObjectException; @@ -89,38 +88,10 @@ import org.eclipse.jgit.util.LongList; * delta packed format yielding high compression of lots of object where some * objects are similar. */ -public final class DfsPackFile { - /** Cache that owns this pack file and its data. */ - private final DfsBlockCache cache; - - /** Description of the pack file's storage. */ - private final DfsPackDescription packDesc; - - /** Unique identity of this pack while in-memory. */ - final DfsStreamKey key; +public final class DfsPackFile extends BlockBasedFile { final DfsStreamKey idxKey = new DfsStreamKey(); final DfsStreamKey reverseIdxKey = new DfsStreamKey(); - final DfsStreamKey bitmapKey = new DfsStreamKey(); - - /** - * Total number of bytes in this pack file. - *

- * This field initializes to -1 and gets populated when a block is loaded. - */ - volatile long length; - - /** - * Preferred alignment for loading blocks from the backing file. - *

- * It is initialized to 0 and filled in on the first read made from the - * file. Block sizes may be odd, e.g. 4091, caused by the underling DFS - * storing 4091 user bytes and 5 bytes block metadata into a lower level - * 4096 byte block on disk. - */ - private volatile int blockSize; - - /** True once corruption has been detected that cannot be worked around. */ - private volatile boolean invalid; + DfsStreamKey bitmapKey; /** * Lock for initialization of {@link #index} and {@link #corruptObjects}. @@ -158,10 +129,7 @@ public final class DfsPackFile { * interned key used to identify blocks in the block cache. */ DfsPackFile(DfsBlockCache cache, DfsPackDescription desc, DfsStreamKey key) { - this.cache = cache; - this.packDesc = desc; - this.key = key; - + super(cache, key, desc, PACK); length = desc.getFileSize(PACK); if (length <= 0) length = -1; @@ -185,14 +153,6 @@ public final class DfsPackFile { return key.cachedSize.get(); } - String getPackName() { - return packDesc.getFileName(PACK); - } - - void setBlockSize(int newSize) { - blockSize = newSize; - } - void setPackIndex(PackIndex idx) { long objCnt = idx.getObjectCount(); int recSize = Constants.OBJECT_ID_LENGTH + 8; @@ -223,7 +183,7 @@ public final class DfsPackFile { } if (invalid) - throw new PackInvalidException(getPackName()); + throw new PackInvalidException(getFileName()); Repository.getGlobalListenerList() .dispatch(new BeforeDfsPackIndexLoadedEvent(this)); @@ -276,10 +236,6 @@ public final class DfsPackFile { } } - private static long elapsedMicros(long start) { - return (System.nanoTime() - start) / 1000L; - } - final boolean isGarbage() { return packDesc.getPackSource() == UNREACHABLE_GARBAGE; } @@ -304,7 +260,9 @@ public final class DfsPackFile { if (idx != null) return idx; } - + if (bitmapKey == null) { + bitmapKey = new DfsStreamKey(); + } long size; PackBitmapIndex idx; try { @@ -650,7 +608,7 @@ public final class DfsPackFile { setCorrupt(src.offset); throw new CorruptObjectException(MessageFormat.format( JGitText.get().objectAtHasBadZlibStream, - Long.valueOf(src.offset), getPackName())); + Long.valueOf(src.offset), getFileName())); } } else if (validate) { assert(crc1 != null); @@ -692,7 +650,7 @@ public final class DfsPackFile { CorruptObjectException corruptObject = new CorruptObjectException( MessageFormat.format( JGitText.get().objectAtHasBadZlibStream, - Long.valueOf(src.offset), getPackName())); + Long.valueOf(src.offset), getFileName())); corruptObject.initCause(dataFormat); StoredObjectRepresentationNotAvailableException gone; @@ -754,24 +712,16 @@ public final class DfsPackFile { if (crc2.getValue() != expectedCRC) { throw new CorruptObjectException(MessageFormat.format( JGitText.get().objectAtHasBadZlibStream, - Long.valueOf(src.offset), getPackName())); + Long.valueOf(src.offset), getFileName())); } } } } - boolean invalid() { - return invalid; - } - - void setInvalid() { - invalid = true; - } - private IOException packfileIsTruncated() { invalid = true; return new IOException(MessageFormat.format( - JGitText.get().packfileIsTruncated, getPackName())); + JGitText.get().packfileIsTruncated, getFileName())); } private void readFully(long position, byte[] dstbuf, int dstoff, int cnt, @@ -780,107 +730,10 @@ public final class DfsPackFile { throw new EOFException(); } - long alignToBlock(long pos) { - int size = blockSize; - if (size == 0) - size = cache.getBlockSize(); - return (pos / size) * size; - } - DfsBlock getOrLoadBlock(long pos, DfsReader ctx) throws IOException { return cache.getOrLoad(this, pos, ctx, null); } - DfsBlock readOneBlock(long pos, DfsReader ctx, - @Nullable ReadableChannel packChannel) throws IOException { - if (invalid) - throw new PackInvalidException(getPackName()); - - ctx.stats.readBlock++; - long start = System.nanoTime(); - ReadableChannel rc = packChannel != null - ? packChannel - : ctx.db.openFile(packDesc, PACK); - try { - int size = blockSize(rc); - pos = (pos / size) * size; - - // If the size of the file is not yet known, try to discover it. - // Channels may choose to return -1 to indicate they don't - // know the length yet, in this case read up to the size unit - // given by the caller, then recheck the length. - long len = length; - if (len < 0) { - len = rc.size(); - if (0 <= len) - length = len; - } - - if (0 <= len && len < pos + size) - size = (int) (len - pos); - if (size <= 0) - throw new EOFException(MessageFormat.format( - DfsText.get().shortReadOfBlock, Long.valueOf(pos), - getPackName(), Long.valueOf(0), Long.valueOf(0))); - - byte[] buf = new byte[size]; - rc.position(pos); - int cnt = read(rc, ByteBuffer.wrap(buf, 0, size)); - ctx.stats.readBlockBytes += cnt; - if (cnt != size) { - if (0 <= len) { - throw new EOFException(MessageFormat.format( - DfsText.get().shortReadOfBlock, - Long.valueOf(pos), - getPackName(), - Integer.valueOf(size), - Integer.valueOf(cnt))); - } - - // Assume the entire thing was read in a single shot, compact - // the buffer to only the space required. - byte[] n = new byte[cnt]; - System.arraycopy(buf, 0, n, 0, n.length); - buf = n; - } else if (len < 0) { - // With no length at the start of the read, the channel should - // have the length available at the end. - length = len = rc.size(); - } - - return new DfsBlock(key, pos, buf); - } finally { - if (rc != packChannel) { - rc.close(); - } - ctx.stats.readBlockMicros += elapsedMicros(start); - } - } - - private int blockSize(ReadableChannel rc) { - // If the block alignment is not yet known, discover it. Prefer the - // larger size from either the cache or the file itself. - int size = blockSize; - if (size == 0) { - size = rc.blockSize(); - if (size <= 0) - size = cache.getBlockSize(); - else if (size < cache.getBlockSize()) - size = (cache.getBlockSize() / size) * size; - blockSize = size; - } - return size; - } - - private static int read(ReadableChannel rc, ByteBuffer buf) - throws IOException { - int n; - do { - n = rc.read(buf); - } while (0 < n && buf.hasRemaining()); - return buf.position(); - } - ObjectLoader load(DfsReader ctx, long pos) throws IOException { try { @@ -1017,7 +870,7 @@ public final class DfsPackFile { CorruptObjectException coe = new CorruptObjectException( MessageFormat.format( JGitText.get().objectAtHasBadZlibStream, Long.valueOf(pos), - getPackName())); + getFileName())); coe.initCause(dfe); throw coe; } @@ -1165,7 +1018,7 @@ public final class DfsPackFile { CorruptObjectException coe = new CorruptObjectException( MessageFormat.format( JGitText.get().objectAtHasBadZlibStream, Long.valueOf(pos), - getPackName())); + getFileName())); coe.initCause(dfe); throw coe; } -- 2.39.5