1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
|
/*
* Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.util.io;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.StreamCorruptedException;
import java.text.MessageFormat;
import org.eclipse.jgit.internal.JGitText;
/**
* An {@link InputStream} that applies a binary delta to a base on the fly.
* <p>
* Delta application to a base needs random access to the base data. The delta
* is expressed as a sequence of copy and insert instructions. A copy
* instruction has the form "COPY fromOffset length" and says "copy length bytes
* from the base, starting at offset fromOffset, to the result". An insert
* instruction has the form "INSERT length" followed by length bytes and says
* "copy the next length bytes from the delta to the result".
* </p>
* <p>
* These instructions are generated using a content-defined chunking algorithm
* (currently C git uses the standard Rabin variant; but there are others that
* could be used) that identifies equal chunks. It is entirely possible that a
* later copy instruction has a fromOffset that is before the fromOffset of an
* earlier copy instruction.
* </p>
* <p>
* This makes it impossible to stream the base.
* </p>
* <p>
* JGit is limited to 2GB maximum size for the base since array indices are
* signed 32bit values.
*
* @since 5.12
*/
public class BinaryDeltaInputStream extends InputStream {
private final byte[] base;
private final InputStream delta;
private long resultLength;
private long toDeliver = -1;
private int fromBase;
private int fromDelta;
private int baseOffset = -1;
/**
* Creates a new {@link BinaryDeltaInputStream} that applies {@code delta}
* to {@code base}.
*
* @param base
* data to apply the delta to
* @param delta
* {@link InputStream} delivering the delta to apply
*/
public BinaryDeltaInputStream(byte[] base, InputStream delta) {
this.base = base;
this.delta = delta;
}
@Override
public int read() throws IOException {
int b = readNext();
if (b >= 0) {
toDeliver--;
}
return b;
}
private void initialize() throws IOException {
long baseSize = readVarInt(delta);
if (baseSize > Integer.MAX_VALUE || baseSize < 0
|| (int) baseSize != base.length) {
throw new IOException(MessageFormat.format(
JGitText.get().binaryDeltaBaseLengthMismatch,
Integer.valueOf(base.length), Long.valueOf(baseSize)));
}
resultLength = readVarInt(delta);
if (resultLength < 0) {
throw new StreamCorruptedException(
JGitText.get().binaryDeltaInvalidResultLength);
}
toDeliver = resultLength;
baseOffset = 0;
}
private int readNext() throws IOException {
if (baseOffset < 0) {
initialize();
}
if (fromBase > 0) {
fromBase--;
return base[baseOffset++] & 0xFF;
} else if (fromDelta > 0) {
fromDelta--;
return delta.read();
}
int command = delta.read();
if (command < 0) {
return -1;
}
if ((command & 0x80) != 0) {
// Decode offset and length to read from base
long copyOffset = 0;
for (int i = 1, shift = 0; i < 0x10; i *= 2, shift += 8) {
if ((command & i) != 0) {
copyOffset |= ((long) next(delta)) << shift;
}
}
int copySize = 0;
for (int i = 0x10, shift = 0; i < 0x80; i *= 2, shift += 8) {
if ((command & i) != 0) {
copySize |= next(delta) << shift;
}
}
if (copySize == 0) {
copySize = 0x10000;
}
if (copyOffset > base.length - copySize) {
throw new StreamCorruptedException(MessageFormat.format(
JGitText.get().binaryDeltaInvalidOffset,
Long.valueOf(copyOffset), Integer.valueOf(copySize)));
}
baseOffset = (int) copyOffset;
fromBase = copySize;
return readNext();
} else if (command != 0) {
// The next 'command' bytes come from the delta
fromDelta = command - 1;
return delta.read();
} else {
// Zero is reserved
throw new StreamCorruptedException(
JGitText.get().unsupportedCommand0);
}
}
private int next(InputStream in) throws IOException {
int b = in.read();
if (b < 0) {
throw new EOFException();
}
return b;
}
private long readVarInt(InputStream in) throws IOException {
long val = 0;
int shift = 0;
int b;
do {
b = next(in);
val |= ((long) (b & 0x7f)) << shift;
shift += 7;
} while ((b & 0x80) != 0);
return val;
}
/**
* Tells the expected size of the final result.
*
* @return the size
* @throws IOException
* if the size cannot be determined from {@code delta}
*/
public long getExpectedResultSize() throws IOException {
if (baseOffset < 0) {
initialize();
}
return resultLength;
}
/**
* Tells whether the delta has been fully consumed, and the expected number
* of bytes for the combined result have been read from this
* {@link BinaryDeltaInputStream}.
*
* @return whether delta application was successful
*/
public boolean isFullyConsumed() {
try {
return toDeliver == 0 && delta.read() < 0;
} catch (IOException e) {
return toDeliver == 0;
}
}
@Override
public void close() throws IOException {
delta.close();
}
}
|