1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
|
/*
* Copyright (C) 2024, GerritForge Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.midx;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.CHUNK_LOOKUP_WIDTH;
import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_BITMAPPEDPACKS;
import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_LARGEOFFSETS;
import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OBJECTOFFSETS;
import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OIDFANOUT;
import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OIDLOOKUP;
import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_PACKNAMES;
import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_REVINDEX;
import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_SIGNATURE;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.util.IO;
import org.eclipse.jgit.util.NB;
import org.eclipse.jgit.util.io.SilentFileInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The loader returns the representation of the MultiPackIndex file content.
*/
public class MultiPackIndexLoader {
private final static Logger LOG = LoggerFactory
.getLogger(MultiPackIndexLoader.class);
/**
* Open an existing MultiPackIndex file for reading.
* <p>
* The format of the file will be automatically detected and a proper access
* implementation for that format will be constructed and returned to the
* caller. The file may or may not be held open by the returned instance.
*
* @param midxFile
* existing multi-pack-index to read.
* @return a copy of the multi-pack-index file in memory
* @throws FileNotFoundException
* the file does not exist.
* @throws MultiPackIndexFormatException
* MultiPackIndex file's format is different from we expected.
* @throws java.io.IOException
* the file exists but could not be read due to security errors
* or unexpected data corruption.
*/
public static MultiPackIndex open(File midxFile)
throws FileNotFoundException, MultiPackIndexFormatException,
IOException {
try (SilentFileInputStream fd = new SilentFileInputStream(midxFile)) {
try {
return read(fd);
} catch (MultiPackIndexFormatException fe) {
throw fe;
} catch (IOException ioe) {
throw new IOException(
MessageFormat.format(JGitText.get().unreadableMIDX,
midxFile.getAbsolutePath()),
ioe);
}
}
}
/**
* Read an existing MultiPackIndex file from a buffered stream.
* <p>
* The format of the file will be automatically detected and a proper access
* implementation for that format will be constructed and returned to the
* caller. The file may or may not be held open by the returned instance.
*
* @param fd
* stream to read the multipack-index file from. The stream must be
* buffered as some small IOs are performed against the stream.
* The caller is responsible for closing the stream.
* @return a copy of the MultiPackIndex file in memory
* @throws MultiPackIndexFormatException
* the MultiPackIndex file's format is different from we
* expected.
* @throws java.io.IOException
* the stream cannot be read.
*/
public static MultiPackIndex read(InputStream fd)
throws MultiPackIndexFormatException, IOException {
byte[] hdr = new byte[12];
IO.readFully(fd, hdr, 0, hdr.length);
int magic = NB.decodeInt32(hdr, 0);
if (magic != MIDX_SIGNATURE) {
throw new MultiPackIndexFormatException(JGitText.get().notAMIDX);
}
// Check MultiPackIndex version
int v = hdr[4];
if (v != 1) {
throw new MultiPackIndexFormatException(MessageFormat
.format(JGitText.get().unsupportedMIDXVersion, v));
}
// Read the object Id version (1 byte)
// 1 => SHA-1
// 2 => SHA-256
// TODO: If the hash type does not match the repository's hash
// algorithm,
// the multi-pack-index file should be ignored with a warning
// presented to the user.
int commitIdVersion = hdr[5];
if (commitIdVersion != 1) {
throw new MultiPackIndexFormatException(
JGitText.get().incorrectOBJECT_ID_LENGTH);
}
// Read the number of "chunkOffsets" (1 byte)
int chunkCount = hdr[6];
// Read the number of multi-pack-index files (1 byte)
// This value is currently always zero.
// TODO populate this
// int numberOfMultiPackIndexFiles = hdr[7];
// Number of packfiles (4 bytes)
int packCount = NB.decodeInt32(hdr, 8);
byte[] lookupBuffer = new byte[CHUNK_LOOKUP_WIDTH * (chunkCount + 1)];
IO.readFully(fd, lookupBuffer, 0, lookupBuffer.length);
List<ChunkSegment> chunks = new ArrayList<>(chunkCount + 1);
for (int i = 0; i <= chunkCount; i++) {
// chunks[chunkCount] is just a marker, in order to record the
// length of the last chunk.
int id = NB.decodeInt32(lookupBuffer, i * 12);
long offset = NB.decodeInt64(lookupBuffer, i * 12 + 4);
chunks.add(new ChunkSegment(id, offset));
}
MultiPackIndexBuilder builder = MultiPackIndexBuilder.builder();
builder.setPackCount(packCount);
for (int i = 0; i < chunkCount; i++) {
long chunkOffset = chunks.get(i).offset;
int chunkId = chunks.get(i).id;
long len = chunks.get(i + 1).offset - chunkOffset;
if (len > Integer.MAX_VALUE - 8) { // http://stackoverflow.com/a/8381338
throw new MultiPackIndexFormatException(
JGitText.get().multiPackIndexFileIsTooLargeForJgit);
}
byte[] buffer = new byte[(int) len];
IO.readFully(fd, buffer, 0, buffer.length);
switch (chunkId) {
case MIDX_CHUNKID_OIDFANOUT:
builder.addOidFanout(buffer);
break;
case MIDX_CHUNKID_OIDLOOKUP:
builder.addOidLookUp(buffer);
break;
case MIDX_CHUNKID_PACKNAMES:
builder.addPackNames(buffer);
break;
case MIDX_CHUNKID_BITMAPPEDPACKS:
builder.addBitmappedPacks(buffer);
break;
case MIDX_CHUNKID_OBJECTOFFSETS:
builder.addObjectOffsets(buffer);
break;
case MIDX_CHUNKID_LARGEOFFSETS:
builder.addObjectLargeOffsets(buffer);
break;
default:
LOG.warn(MessageFormat.format(JGitText.get().midxChunkUnknown,
Integer.toHexString(chunkId)));
}
}
return builder.build();
}
private record ChunkSegment(int id, long offset) {}
/**
* Accumulate byte[] of the different chunks, to build a multipack index
*/
// Visible for testing
static class MultiPackIndexBuilder {
private final int hashLength;
private int packCount;
private byte[] oidFanout;
private byte[] oidLookup;
private String[] packNames;
private byte[] bitmappedPackfiles;
private byte[] objectOffsets;
// Optional
private byte[] largeObjectOffsets;
// Optional
private byte[] bitmapPackOrder;
private MultiPackIndexBuilder(int hashLength) {
this.hashLength = hashLength;
}
/**
* Create builder
*
* @return A builder of {@link MultiPackIndex}.
*/
static MultiPackIndexBuilder builder() {
return new MultiPackIndexBuilder(OBJECT_ID_LENGTH);
}
MultiPackIndexBuilder setPackCount(int packCount) {
this.packCount = packCount;
return this;
}
MultiPackIndexBuilder addOidFanout(byte[] buffer)
throws MultiPackIndexFormatException {
assertChunkNotSeenYet(oidFanout, MIDX_CHUNKID_OIDFANOUT);
oidFanout = buffer;
return this;
}
MultiPackIndexBuilder addOidLookUp(byte[] buffer)
throws MultiPackIndexFormatException {
assertChunkNotSeenYet(oidLookup, MIDX_CHUNKID_OIDLOOKUP);
oidLookup = buffer;
return this;
}
MultiPackIndexBuilder addPackNames(byte[] buffer)
throws MultiPackIndexFormatException {
assertChunkNotSeenYet(packNames, MIDX_CHUNKID_PACKNAMES);
packNames = new String(buffer, UTF_8).split("\u0000");
return this;
}
MultiPackIndexBuilder addBitmappedPacks(byte[] buffer)
throws MultiPackIndexFormatException {
assertChunkNotSeenYet(bitmappedPackfiles,
MIDX_CHUNKID_BITMAPPEDPACKS);
bitmappedPackfiles = buffer;
return this;
}
MultiPackIndexBuilder addObjectOffsets(byte[] buffer)
throws MultiPackIndexFormatException {
assertChunkNotSeenYet(objectOffsets, MIDX_CHUNKID_OBJECTOFFSETS);
objectOffsets = buffer;
return this;
}
MultiPackIndexBuilder addObjectLargeOffsets(byte[] buffer)
throws MultiPackIndexFormatException {
assertChunkNotSeenYet(largeObjectOffsets,
MIDX_CHUNKID_LARGEOFFSETS);
largeObjectOffsets = buffer;
return this;
}
MultiPackIndexBuilder addReverseIndex(byte[] buffer)
throws MultiPackIndexFormatException {
assertChunkNotSeenYet(bitmapPackOrder, MIDX_CHUNKID_REVINDEX);
bitmapPackOrder = buffer;
return this;
}
MultiPackIndex build() throws MultiPackIndexFormatException {
assertChunkNotNull(oidFanout, MIDX_CHUNKID_OIDFANOUT);
assertChunkNotNull(oidLookup, MIDX_CHUNKID_OIDLOOKUP);
assertChunkNotNull(packNames, MIDX_CHUNKID_PACKNAMES);
assertChunkNotNull(objectOffsets, MIDX_CHUNKID_OBJECTOFFSETS);
assertPackCounts(packCount, packNames.length);
return new MultiPackIndexV1(hashLength, oidFanout, oidLookup,
packNames, bitmappedPackfiles, objectOffsets, largeObjectOffsets);
}
private static void assertChunkNotNull(Object object, int chunkId)
throws MultiPackIndexFormatException {
if (object == null) {
throw new MultiPackIndexFormatException(
MessageFormat.format(JGitText.get().midxChunkNeeded,
Integer.toHexString(chunkId)));
}
}
private static void assertChunkNotSeenYet(Object object, int chunkId)
throws MultiPackIndexFormatException {
if (object != null) {
throw new MultiPackIndexFormatException(
MessageFormat.format(JGitText.get().midxChunkRepeated,
Integer.toHexString(chunkId)));
}
}
private static void assertPackCounts(int headerCount,
int packfileNamesCount) throws MultiPackIndexFormatException {
if (headerCount != packfileNamesCount) {
throw new MultiPackIndexFormatException(MessageFormat.format(
JGitText.get().multiPackIndexPackCountMismatch,
headerCount, packfileNamesCount));
}
}
}
/**
* Thrown when a MultiPackIndex file's format is different from we expected
*/
public static class MultiPackIndexFormatException extends IOException {
private static final long serialVersionUID = 1L;
/**
* Construct an exception.
*
* @param why
* description of the type of error.
*/
MultiPackIndexFormatException(String why) {
super(why);
}
}
}
|