summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Wolf <thomas.wolf@paranor.ch>2021-03-07 18:49:01 +0100
committerMatthias Sohn <matthias.sohn@sap.com>2021-05-26 00:37:59 +0200
commit0fe794a433d54504de066ae119b5835ab69c1c54 (patch)
tree16816bdb86f8f7397bb299efb3a530f80f537785
parent2eb54afe6a5cb5dd2a108285ad1676b7798d5a15 (diff)
downloadjgit-0fe794a433d54504de066ae119b5835ab69c1c54.tar.gz
jgit-0fe794a433d54504de066ae119b5835ab69c1c54.zip
ApplyCommand: add a stream to apply a delta patch
Add a new BinaryDeltaInputStream that applies a delta provided by another InputStream to a given base. Because delta application needs random access to the base, the base itself cannot be yet another InputStream. But at least this enables streaming of the result. Add a simple test using delta hunks generated by C git. Bug: 371725 Change-Id: Ibd26fa2f49860737ad5c5387f7f4870d3e85e628 Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch> Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
-rw-r--r--org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.forward1
-rw-r--r--org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.reverse1
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/BinaryDeltaInputStreamTest.java103
-rw-r--r--org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties3
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java3
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryDeltaInputStream.java206
6 files changed, 317 insertions, 0 deletions
diff --git a/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.forward b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.forward
new file mode 100644
index 0000000000..878b167ae9
--- /dev/null
+++ b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.forward
@@ -0,0 +1 @@
+ScmZp0Xmwa1z*+$U3j_csN(Dmz
diff --git a/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.reverse b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.reverse
new file mode 100644
index 0000000000..7ff7a08ad0
--- /dev/null
+++ b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.reverse
@@ -0,0 +1 @@
+TcmZp5XmD5{u!xa=5hEi28?FP4
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/BinaryDeltaInputStreamTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/BinaryDeltaInputStreamTest.java
new file mode 100644
index 0000000000..d9297fcd9c
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/BinaryDeltaInputStreamTest.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.util.io;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.util.zip.InflaterInputStream;
+
+import org.junit.Test;
+
+/**
+ * Crude tests for the {@link BinaryDeltaInputStream} using delta diffs
+ * generated by C git.
+ */
+public class BinaryDeltaInputStreamTest {
+
+ private InputStream getBinaryHunk(String name) {
+ return this.getClass().getResourceAsStream(name);
+ }
+
+ @Test
+ public void testBinaryDelta() throws Exception {
+ // Prepare our test data
+ byte[] data = new byte[8192];
+ for (int i = 0; i < data.length; i++) {
+ data[i] = (byte) (255 - (i % 256));
+ }
+ // Same, but with five 'x' inserted in the middle.
+ int middle = data.length / 2;
+ byte[] newData = new byte[data.length + 5];
+ System.arraycopy(data, 0, newData, 0, middle);
+ for (int i = 0; i < 5; i++) {
+ newData[middle + i] = 'x';
+ }
+ System.arraycopy(data, middle, newData, middle + 5, middle);
+ // delta1.forward has the instructions
+ // @formatter:off
+ // COPY 0 4096
+ // INSERT 5 xxxxx
+ // COPY 0 4096
+ // @formatter:on
+ // Note that the way we built newData could be expressed as
+ // @formatter:off
+ // COPY 0 4096
+ // INSERT 5 xxxxx
+ // COPY 4096 4096
+ // @formatter:on
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream();
+ BinaryDeltaInputStream input = new BinaryDeltaInputStream(data,
+ new InflaterInputStream(new BinaryHunkInputStream(
+ getBinaryHunk("delta1.forward"))))) {
+ byte[] buf = new byte[1024];
+ int n;
+ while ((n = input.read(buf)) >= 0) {
+ out.write(buf, 0, n);
+ }
+ assertArrayEquals(newData, out.toByteArray());
+ assertTrue(input.isFullyConsumed());
+ }
+ // delta1.reverse has the instructions
+ // @formatter:off
+ // COPY 0 4096
+ // COPY 256 3840
+ // COPY 256 256
+ // @formatter:on
+ // Note that there are alternatives, for instance
+ // @formatter:off
+ // COPY 0 4096
+ // COPY 4101 4096
+ // @formatter:on
+ // or
+ // @formatter:off
+ // COPY 0 4096
+ // COPY 0 4096
+ // @formatter:on
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream();
+ BinaryDeltaInputStream input = new BinaryDeltaInputStream(
+ newData,
+ new InflaterInputStream(new BinaryHunkInputStream(
+ getBinaryHunk("delta1.reverse"))))) {
+ long expectedSize = input.getExpectedResultSize();
+ assertEquals(data.length, expectedSize);
+ byte[] buf = new byte[1024];
+ int n;
+ while ((n = input.read(buf)) >= 0) {
+ out.write(buf, 0, n);
+ }
+ assertArrayEquals(data, out.toByteArray());
+ assertTrue(input.isFullyConsumed());
+ }
+ }
+}
diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
index f8c9ea0dcc..8b6872d337 100644
--- a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
+++ b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
@@ -43,6 +43,9 @@ base85overflow=Base-85 value overflow, does not fit into 32 bits: 0x{0}
base85tooLong=Extra base-85 encoded data for output size of {0} bytes
base85tooShort=Base-85 data decoded into less than {0} bytes
baseLengthIncorrect=base length incorrect
+binaryDeltaBaseLengthMismatch=Binary delta base length does not match, expected {0}, got {1}
+binaryDeltaInvalidOffset=Binary delta offset + length too large: {0} + {1}
+binaryDeltaInvalidResultLength=Binary delta expected result length is negative
binaryHunkDecodeError=Binary hunk, line {0}: invalid input
binaryHunkInvalidLength=Binary hunk, line {0}: input corrupt; expected length byte, got 0x{1}
binaryHunkLineTooShort=Binary hunk, line {0}: input ended prematurely
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
index 85b40cb6dc..3b67b0f628 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
@@ -71,6 +71,9 @@ public class JGitText extends TranslationBundle {
/***/ public String base85tooLong;
/***/ public String base85tooShort;
/***/ public String baseLengthIncorrect;
+ /***/ public String binaryDeltaBaseLengthMismatch;
+ /***/ public String binaryDeltaInvalidOffset;
+ /***/ public String binaryDeltaInvalidResultLength;
/***/ public String binaryHunkDecodeError;
/***/ public String binaryHunkInvalidLength;
/***/ public String binaryHunkLineTooShort;
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryDeltaInputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryDeltaInputStream.java
new file mode 100644
index 0000000000..7f0d87f0da
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryDeltaInputStream.java
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.util.io;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StreamCorruptedException;
+import java.text.MessageFormat;
+
+import org.eclipse.jgit.internal.JGitText;
+
+/**
+ * An {@link InputStream} that applies a binary delta to a base on the fly.
+ * <p>
+ * Delta application to a base needs random access to the base data. The delta
+ * is expressed as a sequence of copy and insert instructions. A copy
+ * instruction has the form "COPY fromOffset length" and says "copy length bytes
+ * from the base, starting at offset fromOffset, to the result". An insert
+ * instruction has the form "INSERT length" followed by length bytes and says
+ * "copy the next length bytes from the delta to the result".
+ * </p>
+ * <p>
+ * These instructions are generated using a content-defined chunking algorithm
+ * (currently C git uses the standard Rabin variant; but there are others that
+ * could be used) that identifies equal chunks. It is entirely possible that a
+ * later copy instruction has a fromOffset that is before the fromOffset of an
+ * earlier copy instruction.
+ * </p>
+ * <p>
+ * This makes it impossible to stream the base.
+ * </p>
+ * <p>
+ * JGit is limited to 2GB maximum size for the base since array indices are
+ * signed 32bit values.
+ *
+ * @since 5.12
+ */
+public class BinaryDeltaInputStream extends InputStream {
+
+ private final byte[] base;
+
+ private final InputStream delta;
+
+ private long resultLength;
+
+ private long toDeliver = -1;
+
+ private int fromBase;
+
+ private int fromDelta;
+
+ private int baseOffset = -1;
+
+ /**
+ * Creates a new {@link BinaryDeltaInputStream} that applies {@code delta}
+ * to {@code base}.
+ *
+ * @param base
+ * data to apply the delta to
+ * @param delta
+ * {@link InputStream} delivering the delta to apply
+ */
+ public BinaryDeltaInputStream(byte[] base, InputStream delta) {
+ this.base = base;
+ this.delta = delta;
+ }
+
+ @Override
+ public int read() throws IOException {
+ int b = readNext();
+ if (b >= 0) {
+ toDeliver--;
+ }
+ return b;
+ }
+
+ private void initialize() throws IOException {
+ long baseSize = readVarInt(delta);
+ if (baseSize > Integer.MAX_VALUE || baseSize < 0
+ || (int) baseSize != base.length) {
+ throw new IOException(MessageFormat.format(
+ JGitText.get().binaryDeltaBaseLengthMismatch,
+ Integer.valueOf(base.length), Long.valueOf(baseSize)));
+ }
+ resultLength = readVarInt(delta);
+ if (resultLength < 0) {
+ throw new StreamCorruptedException(
+ JGitText.get().binaryDeltaInvalidResultLength);
+ }
+ toDeliver = resultLength;
+ baseOffset = 0;
+ }
+
+ private int readNext() throws IOException {
+ if (baseOffset < 0) {
+ initialize();
+ }
+ if (fromBase > 0) {
+ fromBase--;
+ return base[baseOffset++] & 0xFF;
+ } else if (fromDelta > 0) {
+ fromDelta--;
+ return delta.read();
+ }
+ int command = delta.read();
+ if (command < 0) {
+ return -1;
+ }
+ if ((command & 0x80) != 0) {
+ // Decode offset and length to read from base
+ long copyOffset = 0;
+ for (int i = 1, shift = 0; i < 0x10; i *= 2, shift += 8) {
+ if ((command & i) != 0) {
+ copyOffset |= ((long) next(delta)) << shift;
+ }
+ }
+ int copySize = 0;
+ for (int i = 0x10, shift = 0; i < 0x80; i *= 2, shift += 8) {
+ if ((command & i) != 0) {
+ copySize |= next(delta) << shift;
+ }
+ }
+ if (copySize == 0) {
+ copySize = 0x10000;
+ }
+ if (copyOffset > base.length - copySize) {
+ throw new StreamCorruptedException(MessageFormat.format(
+ JGitText.get().binaryDeltaInvalidOffset,
+ Long.valueOf(copyOffset), Integer.valueOf(copySize)));
+ }
+ baseOffset = (int) copyOffset;
+ fromBase = copySize;
+ return readNext();
+ } else if (command != 0) {
+ // The next 'command' bytes come from the delta
+ fromDelta = command - 1;
+ return delta.read();
+ } else {
+ // Zero is reserved
+ throw new StreamCorruptedException(
+ JGitText.get().unsupportedCommand0);
+ }
+ }
+
+ private int next(InputStream in) throws IOException {
+ int b = in.read();
+ if (b < 0) {
+ throw new EOFException();
+ }
+ return b;
+ }
+
+ private long readVarInt(InputStream in) throws IOException {
+ long val = 0;
+ int shift = 0;
+ int b;
+ do {
+ b = next(in);
+ val |= ((long) (b & 0x7f)) << shift;
+ shift += 7;
+ } while ((b & 0x80) != 0);
+ return val;
+ }
+
+ /**
+ * Tells the expected size of the final result.
+ *
+ * @return the size
+ * @throws IOException
+ * if the size cannot be determined from {@code delta}
+ */
+ public long getExpectedResultSize() throws IOException {
+ if (baseOffset < 0) {
+ initialize();
+ }
+ return resultLength;
+ }
+
+ /**
+ * Tells whether the delta has been fully consumed, and the expected number
+ * of bytes for the combined result have been read from this
+ * {@link BinaryDeltaInputStream}.
+ *
+ * @return whether delta application was successful
+ */
+ public boolean isFullyConsumed() {
+ try {
+ return toDeliver == 0 && delta.read() < 0;
+ } catch (IOException e) {
+ return toDeliver == 0;
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ delta.close();
+ }
+}