Browse Source

ApplyCommand: add a stream to apply a delta patch

Add a new BinaryDeltaInputStream that applies a delta provided by
another InputStream to a given base. Because delta application needs
random access to the base, the base itself cannot be yet another
InputStream. But at least this enables streaming of the result.

Add a simple test using delta hunks generated by C git.

Bug: 371725
Change-Id: Ibd26fa2f49860737ad5c5387f7f4870d3e85e628
Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
tags/v5.12.0.202105261145-m3
Thomas Wolf 3 years ago
parent
commit
0fe794a433

+ 1
- 0
org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.forward View File

@@ -0,0 +1 @@
ScmZp0Xmwa1z*+$U3j_csN(Dmz

+ 1
- 0
org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.reverse View File

@@ -0,0 +1 @@
TcmZp5XmD5{u!xa=5hEi28?FP4

+ 103
- 0
org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/BinaryDeltaInputStreamTest.java View File

@@ -0,0 +1,103 @@
/*
* Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.util.io;

import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.util.zip.InflaterInputStream;

import org.junit.Test;

/**
* Crude tests for the {@link BinaryDeltaInputStream} using delta diffs
* generated by C git.
*/
public class BinaryDeltaInputStreamTest {

private InputStream getBinaryHunk(String name) {
return this.getClass().getResourceAsStream(name);
}

@Test
public void testBinaryDelta() throws Exception {
// Prepare our test data
byte[] data = new byte[8192];
for (int i = 0; i < data.length; i++) {
data[i] = (byte) (255 - (i % 256));
}
// Same, but with five 'x' inserted in the middle.
int middle = data.length / 2;
byte[] newData = new byte[data.length + 5];
System.arraycopy(data, 0, newData, 0, middle);
for (int i = 0; i < 5; i++) {
newData[middle + i] = 'x';
}
System.arraycopy(data, middle, newData, middle + 5, middle);
// delta1.forward has the instructions
// @formatter:off
// COPY 0 4096
// INSERT 5 xxxxx
// COPY 0 4096
// @formatter:on
// Note that the way we built newData could be expressed as
// @formatter:off
// COPY 0 4096
// INSERT 5 xxxxx
// COPY 4096 4096
// @formatter:on
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
BinaryDeltaInputStream input = new BinaryDeltaInputStream(data,
new InflaterInputStream(new BinaryHunkInputStream(
getBinaryHunk("delta1.forward"))))) {
byte[] buf = new byte[1024];
int n;
while ((n = input.read(buf)) >= 0) {
out.write(buf, 0, n);
}
assertArrayEquals(newData, out.toByteArray());
assertTrue(input.isFullyConsumed());
}
// delta1.reverse has the instructions
// @formatter:off
// COPY 0 4096
// COPY 256 3840
// COPY 256 256
// @formatter:on
// Note that there are alternatives, for instance
// @formatter:off
// COPY 0 4096
// COPY 4101 4096
// @formatter:on
// or
// @formatter:off
// COPY 0 4096
// COPY 0 4096
// @formatter:on
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
BinaryDeltaInputStream input = new BinaryDeltaInputStream(
newData,
new InflaterInputStream(new BinaryHunkInputStream(
getBinaryHunk("delta1.reverse"))))) {
long expectedSize = input.getExpectedResultSize();
assertEquals(data.length, expectedSize);
byte[] buf = new byte[1024];
int n;
while ((n = input.read(buf)) >= 0) {
out.write(buf, 0, n);
}
assertArrayEquals(data, out.toByteArray());
assertTrue(input.isFullyConsumed());
}
}
}

+ 3
- 0
org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties View File

@@ -43,6 +43,9 @@ base85overflow=Base-85 value overflow, does not fit into 32 bits: 0x{0}
base85tooLong=Extra base-85 encoded data for output size of {0} bytes
base85tooShort=Base-85 data decoded into less than {0} bytes
baseLengthIncorrect=base length incorrect
binaryDeltaBaseLengthMismatch=Binary delta base length does not match, expected {0}, got {1}
binaryDeltaInvalidOffset=Binary delta offset + length too large: {0} + {1}
binaryDeltaInvalidResultLength=Binary delta expected result length is negative
binaryHunkDecodeError=Binary hunk, line {0}: invalid input
binaryHunkInvalidLength=Binary hunk, line {0}: input corrupt; expected length byte, got 0x{1}
binaryHunkLineTooShort=Binary hunk, line {0}: input ended prematurely

+ 3
- 0
org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java View File

@@ -71,6 +71,9 @@ public class JGitText extends TranslationBundle {
/***/ public String base85tooLong;
/***/ public String base85tooShort;
/***/ public String baseLengthIncorrect;
/***/ public String binaryDeltaBaseLengthMismatch;
/***/ public String binaryDeltaInvalidOffset;
/***/ public String binaryDeltaInvalidResultLength;
/***/ public String binaryHunkDecodeError;
/***/ public String binaryHunkInvalidLength;
/***/ public String binaryHunkLineTooShort;

+ 206
- 0
org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryDeltaInputStream.java View File

@@ -0,0 +1,206 @@
/*
* Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.util.io;

import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.StreamCorruptedException;
import java.text.MessageFormat;

import org.eclipse.jgit.internal.JGitText;

/**
* An {@link InputStream} that applies a binary delta to a base on the fly.
* <p>
* Delta application to a base needs random access to the base data. The delta
* is expressed as a sequence of copy and insert instructions. A copy
* instruction has the form "COPY fromOffset length" and says "copy length bytes
* from the base, starting at offset fromOffset, to the result". An insert
* instruction has the form "INSERT length" followed by length bytes and says
* "copy the next length bytes from the delta to the result".
* </p>
* <p>
* These instructions are generated using a content-defined chunking algorithm
* (currently C git uses the standard Rabin variant; but there are others that
* could be used) that identifies equal chunks. It is entirely possible that a
* later copy instruction has a fromOffset that is before the fromOffset of an
* earlier copy instruction.
* </p>
* <p>
* This makes it impossible to stream the base.
* </p>
* <p>
* JGit is limited to 2GB maximum size for the base since array indices are
* signed 32bit values.
*
* @since 5.12
*/
public class BinaryDeltaInputStream extends InputStream {

private final byte[] base;

private final InputStream delta;

private long resultLength;

private long toDeliver = -1;

private int fromBase;

private int fromDelta;

private int baseOffset = -1;

/**
* Creates a new {@link BinaryDeltaInputStream} that applies {@code delta}
* to {@code base}.
*
* @param base
* data to apply the delta to
* @param delta
* {@link InputStream} delivering the delta to apply
*/
public BinaryDeltaInputStream(byte[] base, InputStream delta) {
this.base = base;
this.delta = delta;
}

@Override
public int read() throws IOException {
int b = readNext();
if (b >= 0) {
toDeliver--;
}
return b;
}

private void initialize() throws IOException {
long baseSize = readVarInt(delta);
if (baseSize > Integer.MAX_VALUE || baseSize < 0
|| (int) baseSize != base.length) {
throw new IOException(MessageFormat.format(
JGitText.get().binaryDeltaBaseLengthMismatch,
Integer.valueOf(base.length), Long.valueOf(baseSize)));
}
resultLength = readVarInt(delta);
if (resultLength < 0) {
throw new StreamCorruptedException(
JGitText.get().binaryDeltaInvalidResultLength);
}
toDeliver = resultLength;
baseOffset = 0;
}

private int readNext() throws IOException {
if (baseOffset < 0) {
initialize();
}
if (fromBase > 0) {
fromBase--;
return base[baseOffset++] & 0xFF;
} else if (fromDelta > 0) {
fromDelta--;
return delta.read();
}
int command = delta.read();
if (command < 0) {
return -1;
}
if ((command & 0x80) != 0) {
// Decode offset and length to read from base
long copyOffset = 0;
for (int i = 1, shift = 0; i < 0x10; i *= 2, shift += 8) {
if ((command & i) != 0) {
copyOffset |= ((long) next(delta)) << shift;
}
}
int copySize = 0;
for (int i = 0x10, shift = 0; i < 0x80; i *= 2, shift += 8) {
if ((command & i) != 0) {
copySize |= next(delta) << shift;
}
}
if (copySize == 0) {
copySize = 0x10000;
}
if (copyOffset > base.length - copySize) {
throw new StreamCorruptedException(MessageFormat.format(
JGitText.get().binaryDeltaInvalidOffset,
Long.valueOf(copyOffset), Integer.valueOf(copySize)));
}
baseOffset = (int) copyOffset;
fromBase = copySize;
return readNext();
} else if (command != 0) {
// The next 'command' bytes come from the delta
fromDelta = command - 1;
return delta.read();
} else {
// Zero is reserved
throw new StreamCorruptedException(
JGitText.get().unsupportedCommand0);
}
}

private int next(InputStream in) throws IOException {
int b = in.read();
if (b < 0) {
throw new EOFException();
}
return b;
}

private long readVarInt(InputStream in) throws IOException {
long val = 0;
int shift = 0;
int b;
do {
b = next(in);
val |= ((long) (b & 0x7f)) << shift;
shift += 7;
} while ((b & 0x80) != 0);
return val;
}

/**
* Tells the expected size of the final result.
*
* @return the size
* @throws IOException
* if the size cannot be determined from {@code delta}
*/
public long getExpectedResultSize() throws IOException {
if (baseOffset < 0) {
initialize();
}
return resultLength;
}

/**
* Tells whether the delta has been fully consumed, and the expected number
* of bytes for the combined result have been read from this
* {@link BinaryDeltaInputStream}.
*
* @return whether delta application was successful
*/
public boolean isFullyConsumed() {
try {
return toDeliver == 0 && delta.read() < 0;
} catch (IOException e) {
return toDeliver == 0;
}
}

@Override
public void close() throws IOException {
delta.close();
}
}

Loading…
Cancel
Save