Browse Source

Add dfs fsck implementation

JGit already had some fsck-like classes like ObjectChecker which can
check for an individual object.

The read-only FsckPackParser which will parse all objects within a pack
file and check it with ObjectChecker. It will also check the pack index
file against the object information from the pack parser.

Change-Id: Ifd8e0d28eb68ff0b8edd2b51b2fa3a50a544c855
Signed-off-by: Zhen Chen <czhen@google.com>
tags/v4.9.0.201710071750-r
Zhen Chen 7 years ago
parent
commit
2c2999643f

+ 23
- 0
org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/JGitTestUtil.java View File

@@ -258,4 +258,27 @@ public abstract class JGitTestUtil {
target);
}

/**
* Concatenate byte arrays.
*
* @param b
* byte arrays to combine together.
* @return a single byte array that contains all bytes copied from input
* byte arrays.
* @since 4.9
*/
public static byte[] concat(byte[]... b) {
int n = 0;
for (byte[] a : b) {
n += a.length;
}

byte[] data = new byte[n];
n = 0;
for (byte[] a : b) {
System.arraycopy(a, 0, data, n, a.length);
n += a.length;
}
return data;
}
}

+ 1
- 0
org.eclipse.jgit.test/META-INF/MANIFEST.MF View File

@@ -23,6 +23,7 @@ Import-Package: com.googlecode.javaewah;version="[1.1.6,2.0.0)",
org.eclipse.jgit.ignore;version="[4.9.0,4.10.0)",
org.eclipse.jgit.ignore.internal;version="[4.9.0,4.10.0)",
org.eclipse.jgit.internal;version="[4.9.0,4.10.0)",
org.eclipse.jgit.internal.fsck;version="[4.9.0,4.10.0)",
org.eclipse.jgit.internal.storage.dfs;version="[4.9.0,4.10.0)",
org.eclipse.jgit.internal.storage.file;version="[4.9.0,4.10.0)",
org.eclipse.jgit.internal.storage.pack;version="[4.9.0,4.10.0)",

+ 201
- 0
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsFsckTest.java View File

@@ -0,0 +1,201 @@
/*
* Copyright (C) 2017, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package org.eclipse.jgit.internal.storage.dfs;

import static org.eclipse.jgit.junit.JGitTestUtil.concat;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
import static org.eclipse.jgit.lib.Constants.encodeASCII;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.IOException;

import org.eclipse.jgit.internal.fsck.FsckError;
import org.eclipse.jgit.internal.fsck.FsckError.CorruptObject;
import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectChecker.ErrorType;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectInserter;
import org.eclipse.jgit.revwalk.RevCommit;
import org.junit.Before;
import org.junit.Test;

public class DfsFsckTest {
private TestRepository<InMemoryRepository> git;

private InMemoryRepository repo;

private ObjectInserter ins;

@Before
public void setUp() throws IOException {
DfsRepositoryDescription desc = new DfsRepositoryDescription("test");
git = new TestRepository<>(new InMemoryRepository(desc));
repo = git.getRepository();
ins = repo.newObjectInserter();
}

@Test
public void testHealthyRepo() throws Exception {
RevCommit commit0 = git.commit().message("0").create();
RevCommit commit1 = git.commit().message("1").parent(commit0).create();
git.update("master", commit1);

DfsFsck fsck = new DfsFsck(repo);
FsckError errors = fsck.check(null);

assertEquals(errors.getCorruptObjects().size(), 0);
assertEquals(errors.getMissingObjects().size(), 0);
assertEquals(errors.getCorruptIndices().size(), 0);
}

@Test
public void testCommitWithCorruptAuthor() throws Exception {
StringBuilder b = new StringBuilder();
b.append("tree be9bfa841874ccc9f2ef7c48d0c76226f89b7189\n");
b.append("author b <b@c> <b@c> 0 +0000\n");
b.append("committer <> 0 +0000\n");
byte[] data = encodeASCII(b.toString());
ObjectId id = ins.insert(Constants.OBJ_COMMIT, data);
ins.flush();

DfsFsck fsck = new DfsFsck(repo);
FsckError errors = fsck.check(null);

assertEquals(errors.getCorruptObjects().size(), 1);
CorruptObject o = errors.getCorruptObjects().iterator().next();
assertTrue(o.getId().equals(id));
assertEquals(o.getErrorType(), ErrorType.BAD_DATE);
}

@Test
public void testCommitWithoutTree() throws Exception {
StringBuilder b = new StringBuilder();
b.append("parent ");
b.append("be9bfa841874ccc9f2ef7c48d0c76226f89b7189");
b.append('\n');
byte[] data = encodeASCII(b.toString());
ObjectId id = ins.insert(Constants.OBJ_COMMIT, data);
ins.flush();

DfsFsck fsck = new DfsFsck(repo);
FsckError errors = fsck.check(null);

assertEquals(errors.getCorruptObjects().size(), 1);
CorruptObject o = errors.getCorruptObjects().iterator().next();
assertTrue(o.getId().equals(id));
assertEquals(o.getErrorType(), ErrorType.MISSING_TREE);
}

@Test
public void testTagWithoutObject() throws Exception {
StringBuilder b = new StringBuilder();
b.append("type commit\n");
b.append("tag test-tag\n");
b.append("tagger A. U. Thor <author@localhost> 1 +0000\n");
byte[] data = encodeASCII(b.toString());
ObjectId id = ins.insert(Constants.OBJ_TAG, data);
ins.flush();

DfsFsck fsck = new DfsFsck(repo);
FsckError errors = fsck.check(null);

assertEquals(errors.getCorruptObjects().size(), 1);
CorruptObject o = errors.getCorruptObjects().iterator().next();
assertTrue(o.getId().equals(id));
assertEquals(o.getErrorType(), ErrorType.MISSING_OBJECT);
}

@Test
public void testTreeWithNullSha() throws Exception {
byte[] data = concat(encodeASCII("100644 A"), new byte[] { '\0' },
new byte[OBJECT_ID_LENGTH]);
ObjectId id = ins.insert(Constants.OBJ_TREE, data);
ins.flush();

DfsFsck fsck = new DfsFsck(repo);
FsckError errors = fsck.check(null);

assertEquals(errors.getCorruptObjects().size(), 1);
CorruptObject o = errors.getCorruptObjects().iterator().next();
assertTrue(o.getId().equals(id));
assertEquals(o.getErrorType(), ErrorType.NULL_SHA1);
}

@Test
public void testMultipleInvalidObjects() throws Exception {
StringBuilder b = new StringBuilder();
b.append("tree ");
b.append("be9bfa841874ccc9f2ef7c48d0c76226f89b7189");
b.append('\n');
b.append("parent ");
b.append("\n");
byte[] data = encodeASCII(b.toString());
ObjectId id1 = ins.insert(Constants.OBJ_COMMIT, data);

b = new StringBuilder();
b.append("100644");
data = encodeASCII(b.toString());
ObjectId id2 = ins.insert(Constants.OBJ_TREE, data);

ins.flush();

DfsFsck fsck = new DfsFsck(repo);
FsckError errors = fsck.check(null);

assertEquals(errors.getCorruptObjects().size(), 2);
for (CorruptObject o : errors.getCorruptObjects()) {
if (o.getId().equals(id1)) {
assertEquals(o.getErrorType(), ErrorType.BAD_PARENT_SHA1);
} else if (o.getId().equals(id2)) {
assertNull(o.getErrorType());
} else {
fail();
}
}
}
}

+ 1
- 13
org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ObjectCheckerTest.java View File

@@ -45,6 +45,7 @@
package org.eclipse.jgit.lib;

import static java.lang.Integer.valueOf;
import static org.eclipse.jgit.junit.JGitTestUtil.concat;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
import static org.eclipse.jgit.lib.Constants.OBJ_BAD;
import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
@@ -1054,20 +1055,7 @@ public class ObjectCheckerTest {
checker.checkTree(data);
}

private static byte[] concat(byte[]... b) {
int n = 0;
for (byte[] a : b) {
n += a.length;
}

byte[] data = new byte[n];
n = 0;
for (byte[] a : b) {
System.arraycopy(a, 0, data, n, a.length);
n += a.length;
}
return data;
}

@Test
public void testInvalidTreeNameIsMacHFSGitCorruptUTF8AtEnd()

+ 1
- 0
org.eclipse.jgit/META-INF/MANIFEST.MF View File

@@ -59,6 +59,7 @@ Export-Package: org.eclipse.jgit.annotations;version="4.9.0",
org.eclipse.jgit.ignore;version="4.9.0",
org.eclipse.jgit.ignore.internal;version="4.9.0";x-friends:="org.eclipse.jgit.test",
org.eclipse.jgit.internal;version="4.9.0";x-friends:="org.eclipse.jgit.test,org.eclipse.jgit.http.test",
org.eclipse.jgit.internal.fsck;version="4.9.0";x-friends:="org.eclipse.jgit.test",
org.eclipse.jgit.internal.ketch;version="4.9.0";x-friends:="org.eclipse.jgit.junit,org.eclipse.jgit.test,org.eclipse.jgit.pgm",
org.eclipse.jgit.internal.storage.dfs;version="4.9.0";
x-friends:="org.eclipse.jgit.test,

+ 4
- 0
org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties View File

@@ -409,8 +409,11 @@ mergeRecursiveReturnedNoCommit=Merge returned no commit:\n Depth {0}\n Head one
mergeRecursiveTooManyMergeBasesFor = "More than {0} merge bases for:\n a {1}\n b {2} found:\n count {3}"
messageAndTaggerNotAllowedInUnannotatedTags = Unannotated tags cannot have a message or tagger
minutesAgo={0} minutes ago
mismatchOffset=mismatch offset for object {0}
mismatchCRC=mismatch CRC for object {0}
missingAccesskey=Missing accesskey.
missingConfigurationForKey=No value for key {0} found in configuration
missingCRC=missing CRC for object {0}
missingDeltaBase=delta base
missingForwardImageInGITBinaryPatch=Missing forward-image in GIT binary patch
missingObject=Missing {0} {1}
@@ -667,6 +670,7 @@ unknownDIRCVersion=Unknown DIRC version {0}
unknownHost=unknown host
unknownIndexVersionOrCorruptIndex=Unknown index version (or corrupt index): {0}
unknownObject=unknown object
unknownObjectInIndex=unknown object {0} found in index but not in pack file
unknownObjectType=Unknown object type {0}.
unknownObjectType2=unknown
unknownRepositoryFormat=Unknown repository format

+ 94
- 0
org.eclipse.jgit/src/org/eclipse/jgit/errors/CorruptPackIndexException.java View File

@@ -0,0 +1,94 @@
/*
* Copyright (C) 2017, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package org.eclipse.jgit.errors;

import org.eclipse.jgit.annotations.Nullable;

/**
* Exception thrown when encounters a corrupt pack index file.
*
* @since 4.9
*/
public class CorruptPackIndexException extends Exception {
private static final long serialVersionUID = 1L;

/** The error type of a corrupt index file. */
public enum ErrorType {
/** Offset does not match index in pack file. */
MISMATCH_OFFSET,
/** CRC does not match CRC of the object data in pack file. */
MISMATCH_CRC,
/** CRC is not present in index file. */
MISSING_CRC,
/** Object in pack is not present in index file. */
MISSING_OBJ,
/** Object in index file is not present in pack file. */
UNKNOWN_OBJ,
}

private ErrorType errorType;

/**
* Report a specific error condition discovered in an index file.
*
* @param message
* the error message.
* @param errorType
* the error type of corruption.
*/
public CorruptPackIndexException(String message, ErrorType errorType) {
super(message);
this.errorType = errorType;
}

/**
* Specific the reason of the corrupt index file.
*
* @return error condition or null.
*/
@Nullable
public ErrorType getErrorType() {
return errorType;
}
}

+ 4
- 0
org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java View File

@@ -468,8 +468,11 @@ public class JGitText extends TranslationBundle {
/***/ public String mergeRecursiveTooManyMergeBasesFor;
/***/ public String messageAndTaggerNotAllowedInUnannotatedTags;
/***/ public String minutesAgo;
/***/ public String mismatchOffset;
/***/ public String mismatchCRC;
/***/ public String missingAccesskey;
/***/ public String missingConfigurationForKey;
/***/ public String missingCRC;
/***/ public String missingDeltaBase;
/***/ public String missingForwardImageInGITBinaryPatch;
/***/ public String missingObject;
@@ -726,6 +729,7 @@ public class JGitText extends TranslationBundle {
/***/ public String unknownHost;
/***/ public String unknownIndexVersionOrCorruptIndex;
/***/ public String unknownObject;
/***/ public String unknownObjectInIndex;
/***/ public String unknownObjectType;
/***/ public String unknownObjectType2;
/***/ public String unknownRepositoryFormat;

+ 145
- 0
org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckError.java View File

@@ -0,0 +1,145 @@
/*
* Copyright (C) 2017, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.internal.fsck;

import java.util.HashSet;
import java.util.Set;

import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.errors.CorruptPackIndexException;
import org.eclipse.jgit.errors.CorruptPackIndexException.ErrorType;
import org.eclipse.jgit.lib.ObjectChecker;
import org.eclipse.jgit.lib.ObjectId;

/** Holds all fsck errors of a git repository. */
public class FsckError {
/** Represents a corrupt object. */
public static class CorruptObject {
final ObjectId id;

final int type;

ObjectChecker.ErrorType errorType;

/**
* @param id
* the object identifier.
* @param type
* type of the object.
*/
public CorruptObject(ObjectId id, int type) {
this.id = id;
this.type = type;
}

void setErrorType(ObjectChecker.ErrorType errorType) {
this.errorType = errorType;
}

/** @return identifier of the object. */
public ObjectId getId() {
return id;
}

/** @return type of the object. */
public int getType() {
return type;
}

/** @return error type of the corruption. */
@Nullable
public ObjectChecker.ErrorType getErrorType() {
return errorType;
}
}

/** Represents a corrupt pack index file. */
public static class CorruptIndex {
String fileName;

CorruptPackIndexException.ErrorType errorType;

/**
* @param fileName
* the file name of the pack index.
* @param errorType
* the type of error as reported in
* {@link CorruptPackIndexException}.
*/
public CorruptIndex(String fileName, ErrorType errorType) {
this.fileName = fileName;
this.errorType = errorType;
}

/** @return the file name of the index file. */
public String getFileName() {
return fileName;
}

/** @return the error type of the corruption. */
public ErrorType getErrorType() {
return errorType;
}
}

private final Set<CorruptObject> corruptObjects = new HashSet<>();

private final Set<ObjectId> missingObjects = new HashSet<>();

private final Set<CorruptIndex> corruptIndices = new HashSet<>();

/** @return corrupt objects from all pack files. */
public Set<CorruptObject> getCorruptObjects() {
return corruptObjects;
}

/** @return missing objects that should present in pack files. */
public Set<ObjectId> getMissingObjects() {
return missingObjects;
}

/** @return corrupt index files associated with the packs. */
public Set<CorruptIndex> getCorruptIndices() {
return corruptIndices;
}
}

+ 326
- 0
org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckPackParser.java View File

@@ -0,0 +1,326 @@
/*
* Copyright (C) 2017, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package org.eclipse.jgit.internal.fsck;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.text.MessageFormat;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.zip.CRC32;

import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.CorruptPackIndexException;
import org.eclipse.jgit.errors.CorruptPackIndexException.ErrorType;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.fsck.FsckError.CorruptObject;
import org.eclipse.jgit.internal.storage.dfs.ReadableChannel;
import org.eclipse.jgit.internal.storage.file.PackIndex;
import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.ObjectChecker;
import org.eclipse.jgit.lib.ObjectDatabase;
import org.eclipse.jgit.transport.PackParser;
import org.eclipse.jgit.transport.PackedObjectInfo;

/** A read-only pack parser for object validity checking. */
public class FsckPackParser extends PackParser {
private final CRC32 crc;

private final ReadableChannel channel;

private final Set<CorruptObject> corruptObjects = new HashSet<>();

private long expectedObjectCount = -1L;

private long offset;

private int blockSize;

/**
* @param db
* the object database which stores repository's data.
* @param channel
* readable channel of the pack file.
*/
public FsckPackParser(ObjectDatabase db, ReadableChannel channel) {
super(db, Channels.newInputStream(channel));
this.channel = channel;
setCheckObjectCollisions(false);
this.crc = new CRC32();
this.blockSize = channel.blockSize() > 0 ? channel.blockSize() : 65536;
}

@Override
protected void onPackHeader(long objCnt) throws IOException {
if (expectedObjectCount >= 0) {
// Some DFS pack files don't contain the correct object count, e.g.
// INSERT/RECEIVE packs don't always contain the correct object
// count in their headers. Overwrite the expected object count
// after parsing the pack header.
setExpectedObjectCount(expectedObjectCount);
}
}

@Override
protected void onBeginWholeObject(long streamPosition, int type,
long inflatedSize) throws IOException {
crc.reset();
}

@Override
protected void onObjectHeader(Source src, byte[] raw, int pos, int len)
throws IOException {
crc.update(raw, pos, len);
}

@Override
protected void onObjectData(Source src, byte[] raw, int pos, int len)
throws IOException {
crc.update(raw, pos, len);
}

@Override
protected void onEndWholeObject(PackedObjectInfo info) throws IOException {
info.setCRC((int) crc.getValue());
}

@Override
protected void onBeginOfsDelta(long deltaStreamPosition,
long baseStreamPosition, long inflatedSize) throws IOException {
crc.reset();
}

@Override
protected void onBeginRefDelta(long deltaStreamPosition, AnyObjectId baseId,
long inflatedSize) throws IOException {
crc.reset();
}

@Override
protected UnresolvedDelta onEndDelta() throws IOException {
UnresolvedDelta delta = new UnresolvedDelta();
delta.setCRC((int) crc.getValue());
return delta;
}

@Override
protected void onInflatedObjectData(PackedObjectInfo obj, int typeCode,
byte[] data) throws IOException {
// FsckPackParser ignores this event.
}

@Override
protected void verifySafeObject(final AnyObjectId id, final int type,
final byte[] data) {
try {
super.verifySafeObject(id, type, data);
} catch (CorruptObjectException e) {
// catch the exception and continue parse the pack file
CorruptObject o = new CorruptObject(id.toObjectId(), type);
if (e.getErrorType() != null) {
o.setErrorType(e.getErrorType());
}
corruptObjects.add(o);
}
}

@Override
protected void onPackFooter(byte[] hash) throws IOException {
}

@Override
protected boolean onAppendBase(int typeCode, byte[] data,
PackedObjectInfo info) throws IOException {
// Do nothing.
return false;
}

@Override
protected void onEndThinPack() throws IOException {
}

@Override
protected ObjectTypeAndSize seekDatabase(PackedObjectInfo obj,
ObjectTypeAndSize info) throws IOException {
crc.reset();
offset = obj.getOffset();
return readObjectHeader(info);
}

@Override
protected ObjectTypeAndSize seekDatabase(UnresolvedDelta delta,
ObjectTypeAndSize info) throws IOException {
crc.reset();
offset = delta.getOffset();
return readObjectHeader(info);
}

@Override
protected int readDatabase(byte[] dst, int pos, int cnt)
throws IOException {
// read from input instead of database.
int n = read(offset, dst, pos, cnt);
if (n > 0) {
offset += n;
}
return n;
}

int read(long channelPosition, byte[] dst, int pos, int cnt)
throws IOException {
long block = channelPosition / blockSize;
byte[] bytes = readFromChannel(block);
if (bytes == null) {
return -1;
}
int offset = (int) (channelPosition - block * blockSize);
int bytesToCopy = Math.min(cnt, bytes.length - offset);
if (bytesToCopy < 1) {
return -1;
}
System.arraycopy(bytes, offset, dst, pos, bytesToCopy);
return bytesToCopy;
}

private byte[] readFromChannel(long block) throws IOException {
channel.position(block * blockSize);
ByteBuffer buf = ByteBuffer.allocate(blockSize);
int totalBytesRead = 0;
while (totalBytesRead < blockSize) {
int bytesRead = channel.read(buf);
if (bytesRead == -1) {
if (totalBytesRead == 0) {
return null;
}
return Arrays.copyOf(buf.array(), totalBytesRead);
}
totalBytesRead += bytesRead;
}
return buf.array();
}

@Override
protected boolean checkCRC(int oldCRC) {
return oldCRC == (int) crc.getValue();
}

@Override
protected void onStoreStream(byte[] raw, int pos, int len)
throws IOException {
}

/**
* @return corrupt objects that reported by {@link ObjectChecker}.
*/
public Set<CorruptObject> getCorruptObjects() {
return corruptObjects;
}

/**
* Verify the existing index file with all objects from the pack.
*
* @param entries
* all the entries that are expected in the index file
* @param idx
* index file associate with the pack
* @throws CorruptPackIndexException
* when the index file is corrupt.
*/
public void verifyIndex(List<PackedObjectInfo> entries, PackIndex idx)
throws CorruptPackIndexException {
Set<String> all = new HashSet<>();
for (PackedObjectInfo entry : entries) {
all.add(entry.getName());
long offset = idx.findOffset(entry);
if (offset == -1) {
throw new CorruptPackIndexException(
MessageFormat.format(JGitText.get().missingObject,
entry.getType(), entry.getName()),
ErrorType.MISSING_OBJ);
} else if (offset != entry.getOffset()) {
throw new CorruptPackIndexException(MessageFormat
.format(JGitText.get().mismatchOffset, entry.getName()),
ErrorType.MISMATCH_OFFSET);
}

try {
if (idx.hasCRC32Support()
&& (int) idx.findCRC32(entry) != entry.getCRC()) {
throw new CorruptPackIndexException(
MessageFormat.format(JGitText.get().mismatchCRC,
entry.getName()),
ErrorType.MISMATCH_CRC);
}
} catch (MissingObjectException e) {
throw new CorruptPackIndexException(MessageFormat
.format(JGitText.get().missingCRC, entry.getName()),
ErrorType.MISSING_CRC);
}
}

for (MutableEntry entry : idx) {
if (!all.contains(entry.name())) {
throw new CorruptPackIndexException(MessageFormat.format(
JGitText.get().unknownObjectInIndex, entry.name()),
ErrorType.UNKNOWN_OBJ);
}
}
}

/**
* Set the object count for overwriting the expected object count from pack
* header.
*
* @param expectedObjectCount
* the actual expected object count.
*/
public void overwriteObjectCount(long expectedObjectCount) {
this.expectedObjectCount = expectedObjectCount;
}
}

+ 4
- 0
org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/package-info.java View File

@@ -0,0 +1,4 @@
/**
* Git fsck support.
*/
package org.eclipse.jgit.internal.fsck;

+ 134
- 0
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java View File

@@ -0,0 +1,134 @@
/*
* Copyright (C) 2017, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package org.eclipse.jgit.internal.storage.dfs;

import java.io.IOException;
import java.util.List;

import org.eclipse.jgit.errors.CorruptPackIndexException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.fsck.FsckError;
import org.eclipse.jgit.internal.fsck.FsckError.CorruptIndex;
import org.eclipse.jgit.internal.fsck.FsckPackParser;
import org.eclipse.jgit.internal.storage.pack.PackExt;
import org.eclipse.jgit.lib.ObjectChecker;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.transport.PackedObjectInfo;

/** Verify the validity and connectivity of a DFS repository. */
public class DfsFsck {
private final DfsRepository repo;

private final DfsObjDatabase objdb;

private final DfsReader ctx;

private ObjectChecker objChecker = new ObjectChecker();

/**
* Initialize DFS fsck.
*
* @param repository
* the dfs repository to check.
*/
public DfsFsck(DfsRepository repository) {
repo = repository;
objdb = repo.getObjectDatabase();
ctx = objdb.newReader();
}


/**
* Verify the integrity and connectivity of all objects in the object
* database.
*
* @param pm
* callback to provide progress feedback during the check.
* @return all errors about the repository.
* @throws IOException
* if encounters IO errors during the process.
*/
public FsckError check(ProgressMonitor pm) throws IOException {
FsckError errors = new FsckError();
try {
for (DfsPackFile pack : objdb.getPacks()) {
DfsPackDescription packDesc = pack.getPackDescription();
try (ReadableChannel channel = repo.getObjectDatabase()
.openFile(packDesc, PackExt.PACK)) {
List<PackedObjectInfo> objectsInPack;
FsckPackParser parser = new FsckPackParser(
repo.getObjectDatabase(), channel);
parser.setObjectChecker(objChecker);
parser.overwriteObjectCount(packDesc.getObjectCount());
parser.parse(pm);
errors.getCorruptObjects()
.addAll(parser.getCorruptObjects());
objectsInPack = parser.getSortedObjectList(null);
parser.verifyIndex(objectsInPack, pack.getPackIndex(ctx));
} catch (MissingObjectException e) {
errors.getMissingObjects().add(e.getObjectId());
} catch (CorruptPackIndexException e) {
errors.getCorruptIndices().add(new CorruptIndex(
pack.getPackDescription()
.getFileName(PackExt.INDEX),
e.getErrorType()));
}
}
} finally {
ctx.close();
}
return errors;
}

/**
* Use a customized object checker instead of the default one. Caller can
* specify a skip list to ignore some errors.
*
* @param objChecker
* A customized object checker.
*/
public void setObjectChecker(ObjectChecker objChecker) {
this.objChecker = objChecker;
}
}

+ 49
- 32
org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java View File

@@ -550,29 +550,7 @@ public abstract class PackParser {
}

if (deltaCount > 0) {
if (resolving instanceof BatchingProgressMonitor) {
((BatchingProgressMonitor) resolving).setDelayStart(
1000,
TimeUnit.MILLISECONDS);
}
resolving.beginTask(JGitText.get().resolvingDeltas, deltaCount);
resolveDeltas(resolving);
if (entryCount < expectedObjectCount) {
if (!isAllowThin()) {
throw new IOException(MessageFormat.format(
JGitText.get().packHasUnresolvedDeltas,
Long.valueOf(expectedObjectCount - entryCount)));
}

resolveDeltasWithExternalBases(resolving);

if (entryCount < expectedObjectCount) {
throw new IOException(MessageFormat.format(
JGitText.get().packHasUnresolvedDeltas,
Long.valueOf(expectedObjectCount - entryCount)));
}
}
resolving.endTask();
processDeltas(resolving);
}

packDigest = null;
@@ -595,6 +573,31 @@ public abstract class PackParser {
return null; // By default there is no locking.
}

private void processDeltas(ProgressMonitor resolving) throws IOException {
if (resolving instanceof BatchingProgressMonitor) {
((BatchingProgressMonitor) resolving).setDelayStart(1000,
TimeUnit.MILLISECONDS);
}
resolving.beginTask(JGitText.get().resolvingDeltas, deltaCount);
resolveDeltas(resolving);
if (entryCount < expectedObjectCount) {
if (!isAllowThin()) {
throw new IOException(MessageFormat.format(
JGitText.get().packHasUnresolvedDeltas,
Long.valueOf(expectedObjectCount - entryCount)));
}

resolveDeltasWithExternalBases(resolving);

if (entryCount < expectedObjectCount) {
throw new IOException(MessageFormat.format(
JGitText.get().packHasUnresolvedDeltas,
Long.valueOf(expectedObjectCount - entryCount)));
}
}
resolving.endTask();
}

private void resolveDeltas(final ProgressMonitor progress)
throws IOException {
final int last = entryCount;
@@ -684,6 +687,7 @@ public abstract class PackParser {
PackedObjectInfo oe;
oe = newInfo(tempObjectId, visit.delta, visit.parent.id);
oe.setOffset(visit.delta.position);
oe.setType(type);
onInflatedObjectData(oe, type, visit.data);
addObjectAndTrack(oe);
visit.id = oe;
@@ -854,10 +858,9 @@ public abstract class PackParser {
visit.id = baseId;
final int typeCode = ldr.getType();
final PackedObjectInfo oe = newInfo(baseId, null, null);
oe.setType(typeCode);
if (onAppendBase(typeCode, visit.data, oe))
entries[entryCount++] = oe;

visit.nextChild = firstChildOf(oe);
resolveDeltas(visit.next(), typeCode,
new ObjectTypeAndSize(), progress);
@@ -1059,6 +1062,7 @@ public abstract class PackParser {

PackedObjectInfo obj = newInfo(tempObjectId, null, null);
obj.setOffset(pos);
obj.setType(type);
onEndWholeObject(obj);
if (data != null)
onInflatedObjectData(obj, type, data);
@@ -1069,8 +1073,21 @@ public abstract class PackParser {
}
}

private void verifySafeObject(final AnyObjectId id, final int type,
final byte[] data) throws IOException {
/**
* Verify the integrity of the object.
*
* @param id
* identity of the object to be checked.
* @param type
* the type of the object.
* @param data
* raw content of the object.
* @throws CorruptObjectException
* @since 4.9
*
*/
protected void verifySafeObject(final AnyObjectId id, final int type,
final byte[] data) throws CorruptObjectException {
if (objCheck != null) {
try {
objCheck.check(id, type, data);
@@ -1078,11 +1095,11 @@ public abstract class PackParser {
if (e.getErrorType() != null) {
throw e;
}
throw new CorruptObjectException(MessageFormat.format(
JGitText.get().invalidObject,
Constants.typeString(type),
id.name(),
e.getMessage()), e);
throw new CorruptObjectException(
MessageFormat.format(JGitText.get().invalidObject,
Constants.typeString(type), id.name(),
e.getMessage()),
e);
}
}
}

+ 23
- 0
org.eclipse.jgit/src/org/eclipse/jgit/transport/PackedObjectInfo.java View File

@@ -45,6 +45,7 @@
package org.eclipse.jgit.transport;

import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectIdOwnerMap;

/**
@@ -59,6 +60,8 @@ public class PackedObjectInfo extends ObjectIdOwnerMap.Entry {

private int crc;

private int type = Constants.OBJ_BAD;

PackedObjectInfo(final long headerOffset, final int packedCRC,
final AnyObjectId id) {
super(id);
@@ -112,4 +115,24 @@ public class PackedObjectInfo extends ObjectIdOwnerMap.Entry {
public void setCRC(final int crc) {
this.crc = crc;
}

/**
* @return the object type. The default type is OBJ_BAD, which is considered
* as unknown or invalid type.
* @since 4.9
*/
public int getType() {
return type;
}

/**
* Record the object type if applicable.
*
* @param type
* the object type.
* @since 4.9
*/
public void setType(int type) {
this.type = type;
}
}

Loading…
Cancel
Save