From b4782d74fdb8e730e06b2fb6c4285fcb4e38439f Mon Sep 17 00:00:00 2001
From: Han-Wen Nienhuys <hanwen@google.com>
Date: Mon, 27 Sep 2021 16:07:27 +0200
Subject: reftable: pass on invalid object ID in conversion

Before, while trying to determine if an object ID was a tag or not,
the reftable conversion would yield an exception.

Change-Id: I3688a0ffa9e774ba27f320e3840ff8cada21ecf0
---
 .../internal/storage/file/FileReftableTest.java    | 23 ++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

(limited to 'org.eclipse.jgit.test')

diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileReftableTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileReftableTest.java
index 2ffbc6255a..4907073ffe 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileReftableTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileReftableTest.java
@@ -58,7 +58,9 @@ import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.OutputStream;
 import java.security.SecureRandom;
 import java.util.ArrayList;
 import java.util.List;
@@ -162,20 +164,21 @@ public class FileReftableTest extends SampleDataRepositoryTestCase {
 		assertTrue(db.getRefDatabase().hasFastTipsWithSha1());
 	}
 
+
 	@Test
-	public void testConvertToRefdir() throws Exception {
+	public void testConvertBrokenObjectId() throws Exception {
 		db.convertToPackedRefs(false, false);
-		assertTrue(db.getRefDatabase() instanceof RefDirectory);
-		Ref h = db.exactRef("HEAD");
-		assertTrue(h.isSymbolic());
-		assertEquals("refs/heads/master", h.getTarget().getName());
+		new File(db.getDirectory(), "refs/heads").mkdirs();
 
-		Ref b = db.exactRef("refs/heads/b");
-		assertFalse(b.isSymbolic());
-		assertTrue(b.isPeeled());
-		assertEquals(bCommit, b.getObjectId().name());
+		String invalidId = "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef";
+		File headFile = new File(db.getDirectory(), "refs/heads/broken");
+		try (OutputStream os = new FileOutputStream(headFile)) {
+			os.write(Constants.encodeASCII(invalidId + "\n"));
+		}
 
-		assertFalse(db.getRefDatabase().hasFastTipsWithSha1());
+		Ref r = db.exactRef("refs/heads/broken");
+		assertNotNull(r);
+		db.convertToReftable(true, false);
 	}
 
 	@Test
-- 
cgit v1.2.3


From 5f8c48413623ea4d1685063582c74a216207ef51 Mon Sep 17 00:00:00 2001
From: Han-Wen Nienhuys <hanwen@google.com>
Date: Tue, 28 Sep 2021 20:05:03 +0200
Subject: reftable: drop code for truncated reads

The reftable format is a block based format, but allows for variably
sized blocks. This obviously happens for reflog blocks (which are zlib
compressed), but is also accepted for index blocks: In the spec, this
is motivated as

     To achieve constant O(1) disk seeks for lookups the index must be
     a single level, which is permitted to exceed the file's
     configured block size, but not the format's max block size of
     15.99 MiB.

Hence, when parsing a block, one cannot be sure of its exact size:
after reading a default-size block (eg. 4kb), the block header may
state that the block is in fact larger.

Before, the code would mark the block as `truncated`, noting

     // Its OK during sequential scan for an index block to have been
     // partially read and be truncated in-memory. This happens when
     // the index block is larger than the file's blockSize. Caller
     // will break out of its scan loop once it sees the blockType.

This looks like either

* a remnant of never-implemented functionality. There is no reason to
  ever sequentially scan an index block.

* alluding to sequential scan of the data blocks before the index
  blocks (eg. scanning refs, which ends when we find the first ref index
  block, and we can then ignore the index block).

This comment is followed by code that populates the
restartTbl/restartCnt fields relative to the (possibly truncated)
buffer. If the buffer is truncated, this essentially reads garbage,
leading to OOB array access when using the index block.

Fix this by dropping the truncated logic and issuing a second read if
the first read was short.

Add a test.

We have never observed this failure scenario at Google. We use 64kb
blocksize, which requires us to need fewer index entries. The reftable
spec mentions an Android repo of size 36M. With 64kb blocks, that's
just 562 index entries. Even with historical growth, we are long from
requiring an index whose size exceeds a single block.

When adding the analogous test for seeking refs, there was no failure.
This points to another possibility which is that the code tries to
avoid writing large index blocks for refs.

I did not investigate further which one it is.

Fixes https://bugs.eclipse.org/bugs/show_bug.cgi?id=576250

Bug: 576250
Change-Id: I41ec21fac9e526ef57b3d6fb57b988bd353ee338
Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
---
 .../jgit/internal/storage/reftable/ReftableTest.java  |  6 ++++++
 .../jgit/internal/storage/reftable/BlockReader.java   | 19 +++----------------
 .../internal/storage/reftable/ReftableReader.java     |  2 +-
 3 files changed, 10 insertions(+), 17 deletions(-)

(limited to 'org.eclipse.jgit.test')

diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java
index ec8b7593f0..229c753447 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java
@@ -836,6 +836,12 @@ public class ReftableTest {
 			}
 			assertFalse(lc.next());
 		}
+
+		for (Ref exp : refs) {
+ 			try (LogCursor lc = t.seekLog(exp.getName())) {
+				assertTrue("has " + exp.getName(), lc.next());
+			}
+		}
 	}
 
 	@Test
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockReader.java
index b66751b94c..6ae7e45efa 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockReader.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockReader.java
@@ -92,7 +92,6 @@ import org.eclipse.jgit.util.RawParseUtils;
 class BlockReader {
 	private byte blockType;
 	private long endPosition;
-	private boolean truncated;
 
 	private byte[] buf;
 	private int bufLen;
@@ -112,10 +111,6 @@ class BlockReader {
 		return blockType;
 	}
 
-	boolean truncated() {
-		return truncated;
-	}
-
 	long endPosition() {
 		return endPosition;
 	}
@@ -331,16 +326,8 @@ class BlockReader {
 			// Log blocks must be inflated after the header.
 			long deflatedSize = inflateBuf(src, pos, blockLen, fileBlockSize);
 			endPosition = pos + 4 + deflatedSize;
-		}
-		if (bufLen < blockLen) {
-			if (blockType != INDEX_BLOCK_TYPE) {
-				throw invalidBlock();
-			}
-			// Its OK during sequential scan for an index block to have been
-			// partially read and be truncated in-memory. This happens when
-			// the index block is larger than the file's blockSize. Caller
-			// will break out of its scan loop once it sees the blockType.
-			truncated = true;
+		} else if (bufLen < blockLen) {
+			readBlockIntoBuf(src, pos, blockLen);
 		} else if (bufLen > blockLen) {
 			bufLen = blockLen;
 		}
@@ -405,7 +392,7 @@ class BlockReader {
 	}
 
 	void verifyIndex() throws IOException {
-		if (blockType != INDEX_BLOCK_TYPE || truncated) {
+		if (blockType != INDEX_BLOCK_TYPE) {
 			throw invalidBlock();
 		}
 	}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java
index fef5fa57c6..3b912ea1c2 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java
@@ -468,7 +468,7 @@ public class ReftableReader extends Reftable implements AutoCloseable {
 
 		BlockReader b = new BlockReader();
 		b.readBlock(src, pos, sz);
-		if (b.type() == INDEX_BLOCK_TYPE && !b.truncated()) {
+		if (b.type() == INDEX_BLOCK_TYPE) {
 			if (indexCache == null) {
 				indexCache = new LongMap<>();
 			}
-- 
cgit v1.2.3