diff options
author | Han-Wen Nienhuys <hanwen@google.com> | 2021-09-28 20:05:03 +0200 |
---|---|---|
committer | Han-Wen Nienhuys <hanwen@google.com> | 2021-09-28 20:35:02 +0200 |
commit | 5f8c48413623ea4d1685063582c74a216207ef51 (patch) | |
tree | 7ad4a9862b47965c25e8033f2f762e38e8098e0d | |
parent | b4782d74fdb8e730e06b2fb6c4285fcb4e38439f (diff) | |
download | jgit-5f8c48413623ea4d1685063582c74a216207ef51.tar.gz jgit-5f8c48413623ea4d1685063582c74a216207ef51.zip |
reftable: drop code for truncated reads
The reftable format is a block based format, but allows for variably
sized blocks. This obviously happens for reflog blocks (which are zlib
compressed), but is also accepted for index blocks: In the spec, this
is motivated as
To achieve constant O(1) disk seeks for lookups the index must be
a single level, which is permitted to exceed the file's
configured block size, but not the format's max block size of
15.99 MiB.
Hence, when parsing a block, one cannot be sure of its exact size:
after reading a default-size block (eg. 4kb), the block header may
state that the block is in fact larger.
Before, the code would mark the block as `truncated`, noting
// Its OK during sequential scan for an index block to have been
// partially read and be truncated in-memory. This happens when
// the index block is larger than the file's blockSize. Caller
// will break out of its scan loop once it sees the blockType.
This looks like either
* a remnant of never-implemented functionality. There is no reason to
ever sequentially scan an index block.
* alluding to sequential scan of the data blocks before the index
blocks (eg. scanning refs, which ends when we find the first ref index
block, and we can then ignore the index block).
This comment is followed by code that populates the
restartTbl/restartCnt fields relative to the (possibly truncated)
buffer. If the buffer is truncated, this essentially reads garbage,
leading to OOB array access when using the index block.
Fix this by dropping the truncated logic and issuing a second read if
the first read was short.
Add a test.
We have never observed this failure scenario at Google. We use 64kb
blocksize, which requires us to need fewer index entries. The reftable
spec mentions an Android repo of size 36M. With 64kb blocks, that's
just 562 index entries. Even with historical growth, we are long from
requiring an index whose size exceeds a single block.
When adding the analogous test for seeking refs, there was no failure.
This points to another possibility which is that the code tries to
avoid writing large index blocks for refs.
I did not investigate further which one it is.
Fixes https://bugs.eclipse.org/bugs/show_bug.cgi?id=576250
Bug: 576250
Change-Id: I41ec21fac9e526ef57b3d6fb57b988bd353ee338
Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
3 files changed, 10 insertions, 17 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java index ec8b7593f0..229c753447 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java @@ -836,6 +836,12 @@ public class ReftableTest { } assertFalse(lc.next()); } + + for (Ref exp : refs) { + try (LogCursor lc = t.seekLog(exp.getName())) { + assertTrue("has " + exp.getName(), lc.next()); + } + } } @Test diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockReader.java index b66751b94c..6ae7e45efa 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockReader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockReader.java @@ -92,7 +92,6 @@ import org.eclipse.jgit.util.RawParseUtils; class BlockReader { private byte blockType; private long endPosition; - private boolean truncated; private byte[] buf; private int bufLen; @@ -112,10 +111,6 @@ class BlockReader { return blockType; } - boolean truncated() { - return truncated; - } - long endPosition() { return endPosition; } @@ -331,16 +326,8 @@ class BlockReader { // Log blocks must be inflated after the header. long deflatedSize = inflateBuf(src, pos, blockLen, fileBlockSize); endPosition = pos + 4 + deflatedSize; - } - if (bufLen < blockLen) { - if (blockType != INDEX_BLOCK_TYPE) { - throw invalidBlock(); - } - // Its OK during sequential scan for an index block to have been - // partially read and be truncated in-memory. This happens when - // the index block is larger than the file's blockSize. Caller - // will break out of its scan loop once it sees the blockType. - truncated = true; + } else if (bufLen < blockLen) { + readBlockIntoBuf(src, pos, blockLen); } else if (bufLen > blockLen) { bufLen = blockLen; } @@ -405,7 +392,7 @@ class BlockReader { } void verifyIndex() throws IOException { - if (blockType != INDEX_BLOCK_TYPE || truncated) { + if (blockType != INDEX_BLOCK_TYPE) { throw invalidBlock(); } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java index fef5fa57c6..3b912ea1c2 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java @@ -468,7 +468,7 @@ public class ReftableReader extends Reftable implements AutoCloseable { BlockReader b = new BlockReader(); b.readBlock(src, pos, sz); - if (b.type() == INDEX_BLOCK_TYPE && !b.truncated()) { + if (b.type() == INDEX_BLOCK_TYPE) { if (indexCache == null) { indexCache = new LongMap<>(); } |