diff options
author | Zhen Chen <czhen@google.com> | 2017-06-20 15:22:09 -0700 |
---|---|---|
committer | Zhen Chen <czhen@google.com> | 2017-07-26 10:12:29 -0700 |
commit | 2c2999643f64c25a1db1d664c5f563c878559ef2 (patch) | |
tree | 232dca08c3f7361de0f78a691b84419d90149bf4 | |
parent | 82f68500c0dfedd98e0769888a46eff7899c5ab7 (diff) | |
download | jgit-2c2999643f64c25a1db1d664c5f563c878559ef2.tar.gz jgit-2c2999643f64c25a1db1d664c5f563c878559ef2.zip |
Add dfs fsck implementation
JGit already had some fsck-like classes like ObjectChecker which can
check for an individual object.
The read-only FsckPackParser which will parse all objects within a pack
file and check it with ObjectChecker. It will also check the pack index
file against the object information from the pack parser.
Change-Id: Ifd8e0d28eb68ff0b8edd2b51b2fa3a50a544c855
Signed-off-by: Zhen Chen <czhen@google.com>
14 files changed, 1010 insertions, 45 deletions
diff --git a/org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/JGitTestUtil.java b/org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/JGitTestUtil.java index 2962e7192f..5bf61f0e83 100644 --- a/org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/JGitTestUtil.java +++ b/org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/JGitTestUtil.java @@ -258,4 +258,27 @@ public abstract class JGitTestUtil { target); } + /** + * Concatenate byte arrays. + * + * @param b + * byte arrays to combine together. + * @return a single byte array that contains all bytes copied from input + * byte arrays. + * @since 4.9 + */ + public static byte[] concat(byte[]... b) { + int n = 0; + for (byte[] a : b) { + n += a.length; + } + + byte[] data = new byte[n]; + n = 0; + for (byte[] a : b) { + System.arraycopy(a, 0, data, n, a.length); + n += a.length; + } + return data; + } } diff --git a/org.eclipse.jgit.test/META-INF/MANIFEST.MF b/org.eclipse.jgit.test/META-INF/MANIFEST.MF index 0400f1ebff..670b42bd86 100644 --- a/org.eclipse.jgit.test/META-INF/MANIFEST.MF +++ b/org.eclipse.jgit.test/META-INF/MANIFEST.MF @@ -23,6 +23,7 @@ Import-Package: com.googlecode.javaewah;version="[1.1.6,2.0.0)", org.eclipse.jgit.ignore;version="[4.9.0,4.10.0)", org.eclipse.jgit.ignore.internal;version="[4.9.0,4.10.0)", org.eclipse.jgit.internal;version="[4.9.0,4.10.0)", + org.eclipse.jgit.internal.fsck;version="[4.9.0,4.10.0)", org.eclipse.jgit.internal.storage.dfs;version="[4.9.0,4.10.0)", org.eclipse.jgit.internal.storage.file;version="[4.9.0,4.10.0)", org.eclipse.jgit.internal.storage.pack;version="[4.9.0,4.10.0)", diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsFsckTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsFsckTest.java new file mode 100644 index 0000000000..3fa3952454 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsFsckTest.java @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.dfs; + +import static org.eclipse.jgit.junit.JGitTestUtil.concat; +import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH; +import static org.eclipse.jgit.lib.Constants.encodeASCII; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; + +import org.eclipse.jgit.internal.fsck.FsckError; +import org.eclipse.jgit.internal.fsck.FsckError.CorruptObject; +import org.eclipse.jgit.junit.TestRepository; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.ObjectChecker.ErrorType; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectInserter; +import org.eclipse.jgit.revwalk.RevCommit; +import org.junit.Before; +import org.junit.Test; + +public class DfsFsckTest { + private TestRepository<InMemoryRepository> git; + + private InMemoryRepository repo; + + private ObjectInserter ins; + + @Before + public void setUp() throws IOException { + DfsRepositoryDescription desc = new DfsRepositoryDescription("test"); + git = new TestRepository<>(new InMemoryRepository(desc)); + repo = git.getRepository(); + ins = repo.newObjectInserter(); + } + + @Test + public void testHealthyRepo() throws Exception { + RevCommit commit0 = git.commit().message("0").create(); + RevCommit commit1 = git.commit().message("1").parent(commit0).create(); + git.update("master", commit1); + + DfsFsck fsck = new DfsFsck(repo); + FsckError errors = fsck.check(null); + + assertEquals(errors.getCorruptObjects().size(), 0); + assertEquals(errors.getMissingObjects().size(), 0); + assertEquals(errors.getCorruptIndices().size(), 0); + } + + @Test + public void testCommitWithCorruptAuthor() throws Exception { + StringBuilder b = new StringBuilder(); + b.append("tree be9bfa841874ccc9f2ef7c48d0c76226f89b7189\n"); + b.append("author b <b@c> <b@c> 0 +0000\n"); + b.append("committer <> 0 +0000\n"); + byte[] data = encodeASCII(b.toString()); + ObjectId id = ins.insert(Constants.OBJ_COMMIT, data); + ins.flush(); + + DfsFsck fsck = new DfsFsck(repo); + FsckError errors = fsck.check(null); + + assertEquals(errors.getCorruptObjects().size(), 1); + CorruptObject o = errors.getCorruptObjects().iterator().next(); + assertTrue(o.getId().equals(id)); + assertEquals(o.getErrorType(), ErrorType.BAD_DATE); + } + + @Test + public void testCommitWithoutTree() throws Exception { + StringBuilder b = new StringBuilder(); + b.append("parent "); + b.append("be9bfa841874ccc9f2ef7c48d0c76226f89b7189"); + b.append('\n'); + byte[] data = encodeASCII(b.toString()); + ObjectId id = ins.insert(Constants.OBJ_COMMIT, data); + ins.flush(); + + DfsFsck fsck = new DfsFsck(repo); + FsckError errors = fsck.check(null); + + assertEquals(errors.getCorruptObjects().size(), 1); + CorruptObject o = errors.getCorruptObjects().iterator().next(); + assertTrue(o.getId().equals(id)); + assertEquals(o.getErrorType(), ErrorType.MISSING_TREE); + } + + @Test + public void testTagWithoutObject() throws Exception { + StringBuilder b = new StringBuilder(); + b.append("type commit\n"); + b.append("tag test-tag\n"); + b.append("tagger A. U. Thor <author@localhost> 1 +0000\n"); + byte[] data = encodeASCII(b.toString()); + ObjectId id = ins.insert(Constants.OBJ_TAG, data); + ins.flush(); + + DfsFsck fsck = new DfsFsck(repo); + FsckError errors = fsck.check(null); + + assertEquals(errors.getCorruptObjects().size(), 1); + CorruptObject o = errors.getCorruptObjects().iterator().next(); + assertTrue(o.getId().equals(id)); + assertEquals(o.getErrorType(), ErrorType.MISSING_OBJECT); + } + + @Test + public void testTreeWithNullSha() throws Exception { + byte[] data = concat(encodeASCII("100644 A"), new byte[] { '\0' }, + new byte[OBJECT_ID_LENGTH]); + ObjectId id = ins.insert(Constants.OBJ_TREE, data); + ins.flush(); + + DfsFsck fsck = new DfsFsck(repo); + FsckError errors = fsck.check(null); + + assertEquals(errors.getCorruptObjects().size(), 1); + CorruptObject o = errors.getCorruptObjects().iterator().next(); + assertTrue(o.getId().equals(id)); + assertEquals(o.getErrorType(), ErrorType.NULL_SHA1); + } + + @Test + public void testMultipleInvalidObjects() throws Exception { + StringBuilder b = new StringBuilder(); + b.append("tree "); + b.append("be9bfa841874ccc9f2ef7c48d0c76226f89b7189"); + b.append('\n'); + b.append("parent "); + b.append("\n"); + byte[] data = encodeASCII(b.toString()); + ObjectId id1 = ins.insert(Constants.OBJ_COMMIT, data); + + b = new StringBuilder(); + b.append("100644"); + data = encodeASCII(b.toString()); + ObjectId id2 = ins.insert(Constants.OBJ_TREE, data); + + ins.flush(); + + DfsFsck fsck = new DfsFsck(repo); + FsckError errors = fsck.check(null); + + assertEquals(errors.getCorruptObjects().size(), 2); + for (CorruptObject o : errors.getCorruptObjects()) { + if (o.getId().equals(id1)) { + assertEquals(o.getErrorType(), ErrorType.BAD_PARENT_SHA1); + } else if (o.getId().equals(id2)) { + assertNull(o.getErrorType()); + } else { + fail(); + } + } + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ObjectCheckerTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ObjectCheckerTest.java index 43160fb115..c8729d9443 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ObjectCheckerTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ObjectCheckerTest.java @@ -45,6 +45,7 @@ package org.eclipse.jgit.lib; import static java.lang.Integer.valueOf; +import static org.eclipse.jgit.junit.JGitTestUtil.concat; import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH; import static org.eclipse.jgit.lib.Constants.OBJ_BAD; import static org.eclipse.jgit.lib.Constants.OBJ_BLOB; @@ -1054,20 +1055,7 @@ public class ObjectCheckerTest { checker.checkTree(data); } - private static byte[] concat(byte[]... b) { - int n = 0; - for (byte[] a : b) { - n += a.length; - } - byte[] data = new byte[n]; - n = 0; - for (byte[] a : b) { - System.arraycopy(a, 0, data, n, a.length); - n += a.length; - } - return data; - } @Test public void testInvalidTreeNameIsMacHFSGitCorruptUTF8AtEnd() diff --git a/org.eclipse.jgit/META-INF/MANIFEST.MF b/org.eclipse.jgit/META-INF/MANIFEST.MF index 478071f19b..4719ceb0e5 100644 --- a/org.eclipse.jgit/META-INF/MANIFEST.MF +++ b/org.eclipse.jgit/META-INF/MANIFEST.MF @@ -59,6 +59,7 @@ Export-Package: org.eclipse.jgit.annotations;version="4.9.0", org.eclipse.jgit.ignore;version="4.9.0", org.eclipse.jgit.ignore.internal;version="4.9.0";x-friends:="org.eclipse.jgit.test", org.eclipse.jgit.internal;version="4.9.0";x-friends:="org.eclipse.jgit.test,org.eclipse.jgit.http.test", + org.eclipse.jgit.internal.fsck;version="4.9.0";x-friends:="org.eclipse.jgit.test", org.eclipse.jgit.internal.ketch;version="4.9.0";x-friends:="org.eclipse.jgit.junit,org.eclipse.jgit.test,org.eclipse.jgit.pgm", org.eclipse.jgit.internal.storage.dfs;version="4.9.0"; x-friends:="org.eclipse.jgit.test, diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties index 6e793dab75..fc29f481d5 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties @@ -409,8 +409,11 @@ mergeRecursiveReturnedNoCommit=Merge returned no commit:\n Depth {0}\n Head one mergeRecursiveTooManyMergeBasesFor = "More than {0} merge bases for:\n a {1}\n b {2} found:\n count {3}" messageAndTaggerNotAllowedInUnannotatedTags = Unannotated tags cannot have a message or tagger minutesAgo={0} minutes ago +mismatchOffset=mismatch offset for object {0} +mismatchCRC=mismatch CRC for object {0} missingAccesskey=Missing accesskey. missingConfigurationForKey=No value for key {0} found in configuration +missingCRC=missing CRC for object {0} missingDeltaBase=delta base missingForwardImageInGITBinaryPatch=Missing forward-image in GIT binary patch missingObject=Missing {0} {1} @@ -667,6 +670,7 @@ unknownDIRCVersion=Unknown DIRC version {0} unknownHost=unknown host unknownIndexVersionOrCorruptIndex=Unknown index version (or corrupt index): {0} unknownObject=unknown object +unknownObjectInIndex=unknown object {0} found in index but not in pack file unknownObjectType=Unknown object type {0}. unknownObjectType2=unknown unknownRepositoryFormat=Unknown repository format diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/errors/CorruptPackIndexException.java b/org.eclipse.jgit/src/org/eclipse/jgit/errors/CorruptPackIndexException.java new file mode 100644 index 0000000000..65d83b3513 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/errors/CorruptPackIndexException.java @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.errors; + +import org.eclipse.jgit.annotations.Nullable; + +/** + * Exception thrown when encounters a corrupt pack index file. + * + * @since 4.9 + */ +public class CorruptPackIndexException extends Exception { + private static final long serialVersionUID = 1L; + + /** The error type of a corrupt index file. */ + public enum ErrorType { + /** Offset does not match index in pack file. */ + MISMATCH_OFFSET, + /** CRC does not match CRC of the object data in pack file. */ + MISMATCH_CRC, + /** CRC is not present in index file. */ + MISSING_CRC, + /** Object in pack is not present in index file. */ + MISSING_OBJ, + /** Object in index file is not present in pack file. */ + UNKNOWN_OBJ, + } + + private ErrorType errorType; + + /** + * Report a specific error condition discovered in an index file. + * + * @param message + * the error message. + * @param errorType + * the error type of corruption. + */ + public CorruptPackIndexException(String message, ErrorType errorType) { + super(message); + this.errorType = errorType; + } + + /** + * Specific the reason of the corrupt index file. + * + * @return error condition or null. + */ + @Nullable + public ErrorType getErrorType() { + return errorType; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java index ea752b950d..a68f839c59 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java @@ -468,8 +468,11 @@ public class JGitText extends TranslationBundle { /***/ public String mergeRecursiveTooManyMergeBasesFor; /***/ public String messageAndTaggerNotAllowedInUnannotatedTags; /***/ public String minutesAgo; + /***/ public String mismatchOffset; + /***/ public String mismatchCRC; /***/ public String missingAccesskey; /***/ public String missingConfigurationForKey; + /***/ public String missingCRC; /***/ public String missingDeltaBase; /***/ public String missingForwardImageInGITBinaryPatch; /***/ public String missingObject; @@ -726,6 +729,7 @@ public class JGitText extends TranslationBundle { /***/ public String unknownHost; /***/ public String unknownIndexVersionOrCorruptIndex; /***/ public String unknownObject; + /***/ public String unknownObjectInIndex; /***/ public String unknownObjectType; /***/ public String unknownObjectType2; /***/ public String unknownRepositoryFormat; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckError.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckError.java new file mode 100644 index 0000000000..b7aac1630b --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckError.java @@ -0,0 +1,145 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.eclipse.jgit.internal.fsck; + +import java.util.HashSet; +import java.util.Set; + +import org.eclipse.jgit.annotations.Nullable; +import org.eclipse.jgit.errors.CorruptPackIndexException; +import org.eclipse.jgit.errors.CorruptPackIndexException.ErrorType; +import org.eclipse.jgit.lib.ObjectChecker; +import org.eclipse.jgit.lib.ObjectId; + +/** Holds all fsck errors of a git repository. */ +public class FsckError { + /** Represents a corrupt object. */ + public static class CorruptObject { + final ObjectId id; + + final int type; + + ObjectChecker.ErrorType errorType; + + /** + * @param id + * the object identifier. + * @param type + * type of the object. + */ + public CorruptObject(ObjectId id, int type) { + this.id = id; + this.type = type; + } + + void setErrorType(ObjectChecker.ErrorType errorType) { + this.errorType = errorType; + } + + /** @return identifier of the object. */ + public ObjectId getId() { + return id; + } + + /** @return type of the object. */ + public int getType() { + return type; + } + + /** @return error type of the corruption. */ + @Nullable + public ObjectChecker.ErrorType getErrorType() { + return errorType; + } + } + + /** Represents a corrupt pack index file. */ + public static class CorruptIndex { + String fileName; + + CorruptPackIndexException.ErrorType errorType; + + /** + * @param fileName + * the file name of the pack index. + * @param errorType + * the type of error as reported in + * {@link CorruptPackIndexException}. + */ + public CorruptIndex(String fileName, ErrorType errorType) { + this.fileName = fileName; + this.errorType = errorType; + } + + /** @return the file name of the index file. */ + public String getFileName() { + return fileName; + } + + /** @return the error type of the corruption. */ + public ErrorType getErrorType() { + return errorType; + } + } + + private final Set<CorruptObject> corruptObjects = new HashSet<>(); + + private final Set<ObjectId> missingObjects = new HashSet<>(); + + private final Set<CorruptIndex> corruptIndices = new HashSet<>(); + + /** @return corrupt objects from all pack files. */ + public Set<CorruptObject> getCorruptObjects() { + return corruptObjects; + } + + /** @return missing objects that should present in pack files. */ + public Set<ObjectId> getMissingObjects() { + return missingObjects; + } + + /** @return corrupt index files associated with the packs. */ + public Set<CorruptIndex> getCorruptIndices() { + return corruptIndices; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckPackParser.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckPackParser.java new file mode 100644 index 0000000000..e6ec6814b3 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckPackParser.java @@ -0,0 +1,326 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.fsck; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.text.MessageFormat; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.zip.CRC32; + +import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.errors.CorruptPackIndexException; +import org.eclipse.jgit.errors.CorruptPackIndexException.ErrorType; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.internal.fsck.FsckError.CorruptObject; +import org.eclipse.jgit.internal.storage.dfs.ReadableChannel; +import org.eclipse.jgit.internal.storage.file.PackIndex; +import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry; +import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.ObjectChecker; +import org.eclipse.jgit.lib.ObjectDatabase; +import org.eclipse.jgit.transport.PackParser; +import org.eclipse.jgit.transport.PackedObjectInfo; + +/** A read-only pack parser for object validity checking. */ +public class FsckPackParser extends PackParser { + private final CRC32 crc; + + private final ReadableChannel channel; + + private final Set<CorruptObject> corruptObjects = new HashSet<>(); + + private long expectedObjectCount = -1L; + + private long offset; + + private int blockSize; + + /** + * @param db + * the object database which stores repository's data. + * @param channel + * readable channel of the pack file. + */ + public FsckPackParser(ObjectDatabase db, ReadableChannel channel) { + super(db, Channels.newInputStream(channel)); + this.channel = channel; + setCheckObjectCollisions(false); + this.crc = new CRC32(); + this.blockSize = channel.blockSize() > 0 ? channel.blockSize() : 65536; + } + + @Override + protected void onPackHeader(long objCnt) throws IOException { + if (expectedObjectCount >= 0) { + // Some DFS pack files don't contain the correct object count, e.g. + // INSERT/RECEIVE packs don't always contain the correct object + // count in their headers. Overwrite the expected object count + // after parsing the pack header. + setExpectedObjectCount(expectedObjectCount); + } + } + + @Override + protected void onBeginWholeObject(long streamPosition, int type, + long inflatedSize) throws IOException { + crc.reset(); + } + + @Override + protected void onObjectHeader(Source src, byte[] raw, int pos, int len) + throws IOException { + crc.update(raw, pos, len); + } + + @Override + protected void onObjectData(Source src, byte[] raw, int pos, int len) + throws IOException { + crc.update(raw, pos, len); + } + + @Override + protected void onEndWholeObject(PackedObjectInfo info) throws IOException { + info.setCRC((int) crc.getValue()); + } + + @Override + protected void onBeginOfsDelta(long deltaStreamPosition, + long baseStreamPosition, long inflatedSize) throws IOException { + crc.reset(); + } + + @Override + protected void onBeginRefDelta(long deltaStreamPosition, AnyObjectId baseId, + long inflatedSize) throws IOException { + crc.reset(); + } + + @Override + protected UnresolvedDelta onEndDelta() throws IOException { + UnresolvedDelta delta = new UnresolvedDelta(); + delta.setCRC((int) crc.getValue()); + return delta; + } + + @Override + protected void onInflatedObjectData(PackedObjectInfo obj, int typeCode, + byte[] data) throws IOException { + // FsckPackParser ignores this event. + } + + @Override + protected void verifySafeObject(final AnyObjectId id, final int type, + final byte[] data) { + try { + super.verifySafeObject(id, type, data); + } catch (CorruptObjectException e) { + // catch the exception and continue parse the pack file + CorruptObject o = new CorruptObject(id.toObjectId(), type); + if (e.getErrorType() != null) { + o.setErrorType(e.getErrorType()); + } + corruptObjects.add(o); + } + } + + @Override + protected void onPackFooter(byte[] hash) throws IOException { + } + + @Override + protected boolean onAppendBase(int typeCode, byte[] data, + PackedObjectInfo info) throws IOException { + // Do nothing. + return false; + } + + @Override + protected void onEndThinPack() throws IOException { + } + + @Override + protected ObjectTypeAndSize seekDatabase(PackedObjectInfo obj, + ObjectTypeAndSize info) throws IOException { + crc.reset(); + offset = obj.getOffset(); + return readObjectHeader(info); + } + + @Override + protected ObjectTypeAndSize seekDatabase(UnresolvedDelta delta, + ObjectTypeAndSize info) throws IOException { + crc.reset(); + offset = delta.getOffset(); + return readObjectHeader(info); + } + + @Override + protected int readDatabase(byte[] dst, int pos, int cnt) + throws IOException { + // read from input instead of database. + int n = read(offset, dst, pos, cnt); + if (n > 0) { + offset += n; + } + return n; + } + + int read(long channelPosition, byte[] dst, int pos, int cnt) + throws IOException { + long block = channelPosition / blockSize; + byte[] bytes = readFromChannel(block); + if (bytes == null) { + return -1; + } + int offset = (int) (channelPosition - block * blockSize); + int bytesToCopy = Math.min(cnt, bytes.length - offset); + if (bytesToCopy < 1) { + return -1; + } + System.arraycopy(bytes, offset, dst, pos, bytesToCopy); + return bytesToCopy; + } + + private byte[] readFromChannel(long block) throws IOException { + channel.position(block * blockSize); + ByteBuffer buf = ByteBuffer.allocate(blockSize); + int totalBytesRead = 0; + while (totalBytesRead < blockSize) { + int bytesRead = channel.read(buf); + if (bytesRead == -1) { + if (totalBytesRead == 0) { + return null; + } + return Arrays.copyOf(buf.array(), totalBytesRead); + } + totalBytesRead += bytesRead; + } + return buf.array(); + } + + @Override + protected boolean checkCRC(int oldCRC) { + return oldCRC == (int) crc.getValue(); + } + + @Override + protected void onStoreStream(byte[] raw, int pos, int len) + throws IOException { + } + + /** + * @return corrupt objects that reported by {@link ObjectChecker}. + */ + public Set<CorruptObject> getCorruptObjects() { + return corruptObjects; + } + + /** + * Verify the existing index file with all objects from the pack. + * + * @param entries + * all the entries that are expected in the index file + * @param idx + * index file associate with the pack + * @throws CorruptPackIndexException + * when the index file is corrupt. + */ + public void verifyIndex(List<PackedObjectInfo> entries, PackIndex idx) + throws CorruptPackIndexException { + Set<String> all = new HashSet<>(); + for (PackedObjectInfo entry : entries) { + all.add(entry.getName()); + long offset = idx.findOffset(entry); + if (offset == -1) { + throw new CorruptPackIndexException( + MessageFormat.format(JGitText.get().missingObject, + entry.getType(), entry.getName()), + ErrorType.MISSING_OBJ); + } else if (offset != entry.getOffset()) { + throw new CorruptPackIndexException(MessageFormat + .format(JGitText.get().mismatchOffset, entry.getName()), + ErrorType.MISMATCH_OFFSET); + } + + try { + if (idx.hasCRC32Support() + && (int) idx.findCRC32(entry) != entry.getCRC()) { + throw new CorruptPackIndexException( + MessageFormat.format(JGitText.get().mismatchCRC, + entry.getName()), + ErrorType.MISMATCH_CRC); + } + } catch (MissingObjectException e) { + throw new CorruptPackIndexException(MessageFormat + .format(JGitText.get().missingCRC, entry.getName()), + ErrorType.MISSING_CRC); + } + } + + for (MutableEntry entry : idx) { + if (!all.contains(entry.name())) { + throw new CorruptPackIndexException(MessageFormat.format( + JGitText.get().unknownObjectInIndex, entry.name()), + ErrorType.UNKNOWN_OBJ); + } + } + } + + /** + * Set the object count for overwriting the expected object count from pack + * header. + * + * @param expectedObjectCount + * the actual expected object count. + */ + public void overwriteObjectCount(long expectedObjectCount) { + this.expectedObjectCount = expectedObjectCount; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/package-info.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/package-info.java new file mode 100644 index 0000000000..361b61fb0e --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/package-info.java @@ -0,0 +1,4 @@ +/** + * Git fsck support. + */ +package org.eclipse.jgit.internal.fsck; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java new file mode 100644 index 0000000000..1cc475a9fe --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.dfs; + +import java.io.IOException; +import java.util.List; + +import org.eclipse.jgit.errors.CorruptPackIndexException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.internal.fsck.FsckError; +import org.eclipse.jgit.internal.fsck.FsckError.CorruptIndex; +import org.eclipse.jgit.internal.fsck.FsckPackParser; +import org.eclipse.jgit.internal.storage.pack.PackExt; +import org.eclipse.jgit.lib.ObjectChecker; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.transport.PackedObjectInfo; + +/** Verify the validity and connectivity of a DFS repository. */ +public class DfsFsck { + private final DfsRepository repo; + + private final DfsObjDatabase objdb; + + private final DfsReader ctx; + + private ObjectChecker objChecker = new ObjectChecker(); + + /** + * Initialize DFS fsck. + * + * @param repository + * the dfs repository to check. + */ + public DfsFsck(DfsRepository repository) { + repo = repository; + objdb = repo.getObjectDatabase(); + ctx = objdb.newReader(); + } + + + /** + * Verify the integrity and connectivity of all objects in the object + * database. + * + * @param pm + * callback to provide progress feedback during the check. + * @return all errors about the repository. + * @throws IOException + * if encounters IO errors during the process. + */ + public FsckError check(ProgressMonitor pm) throws IOException { + FsckError errors = new FsckError(); + try { + for (DfsPackFile pack : objdb.getPacks()) { + DfsPackDescription packDesc = pack.getPackDescription(); + try (ReadableChannel channel = repo.getObjectDatabase() + .openFile(packDesc, PackExt.PACK)) { + List<PackedObjectInfo> objectsInPack; + FsckPackParser parser = new FsckPackParser( + repo.getObjectDatabase(), channel); + parser.setObjectChecker(objChecker); + parser.overwriteObjectCount(packDesc.getObjectCount()); + parser.parse(pm); + errors.getCorruptObjects() + .addAll(parser.getCorruptObjects()); + objectsInPack = parser.getSortedObjectList(null); + parser.verifyIndex(objectsInPack, pack.getPackIndex(ctx)); + } catch (MissingObjectException e) { + errors.getMissingObjects().add(e.getObjectId()); + } catch (CorruptPackIndexException e) { + errors.getCorruptIndices().add(new CorruptIndex( + pack.getPackDescription() + .getFileName(PackExt.INDEX), + e.getErrorType())); + } + } + } finally { + ctx.close(); + } + return errors; + } + + /** + * Use a customized object checker instead of the default one. Caller can + * specify a skip list to ignore some errors. + * + * @param objChecker + * A customized object checker. + */ + public void setObjectChecker(ObjectChecker objChecker) { + this.objChecker = objChecker; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java index 19dfa3465d..db3578bdb4 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java @@ -550,29 +550,7 @@ public abstract class PackParser { } if (deltaCount > 0) { - if (resolving instanceof BatchingProgressMonitor) { - ((BatchingProgressMonitor) resolving).setDelayStart( - 1000, - TimeUnit.MILLISECONDS); - } - resolving.beginTask(JGitText.get().resolvingDeltas, deltaCount); - resolveDeltas(resolving); - if (entryCount < expectedObjectCount) { - if (!isAllowThin()) { - throw new IOException(MessageFormat.format( - JGitText.get().packHasUnresolvedDeltas, - Long.valueOf(expectedObjectCount - entryCount))); - } - - resolveDeltasWithExternalBases(resolving); - - if (entryCount < expectedObjectCount) { - throw new IOException(MessageFormat.format( - JGitText.get().packHasUnresolvedDeltas, - Long.valueOf(expectedObjectCount - entryCount))); - } - } - resolving.endTask(); + processDeltas(resolving); } packDigest = null; @@ -595,6 +573,31 @@ public abstract class PackParser { return null; // By default there is no locking. } + private void processDeltas(ProgressMonitor resolving) throws IOException { + if (resolving instanceof BatchingProgressMonitor) { + ((BatchingProgressMonitor) resolving).setDelayStart(1000, + TimeUnit.MILLISECONDS); + } + resolving.beginTask(JGitText.get().resolvingDeltas, deltaCount); + resolveDeltas(resolving); + if (entryCount < expectedObjectCount) { + if (!isAllowThin()) { + throw new IOException(MessageFormat.format( + JGitText.get().packHasUnresolvedDeltas, + Long.valueOf(expectedObjectCount - entryCount))); + } + + resolveDeltasWithExternalBases(resolving); + + if (entryCount < expectedObjectCount) { + throw new IOException(MessageFormat.format( + JGitText.get().packHasUnresolvedDeltas, + Long.valueOf(expectedObjectCount - entryCount))); + } + } + resolving.endTask(); + } + private void resolveDeltas(final ProgressMonitor progress) throws IOException { final int last = entryCount; @@ -684,6 +687,7 @@ public abstract class PackParser { PackedObjectInfo oe; oe = newInfo(tempObjectId, visit.delta, visit.parent.id); oe.setOffset(visit.delta.position); + oe.setType(type); onInflatedObjectData(oe, type, visit.data); addObjectAndTrack(oe); visit.id = oe; @@ -854,10 +858,9 @@ public abstract class PackParser { visit.id = baseId; final int typeCode = ldr.getType(); final PackedObjectInfo oe = newInfo(baseId, null, null); - + oe.setType(typeCode); if (onAppendBase(typeCode, visit.data, oe)) entries[entryCount++] = oe; - visit.nextChild = firstChildOf(oe); resolveDeltas(visit.next(), typeCode, new ObjectTypeAndSize(), progress); @@ -1059,6 +1062,7 @@ public abstract class PackParser { PackedObjectInfo obj = newInfo(tempObjectId, null, null); obj.setOffset(pos); + obj.setType(type); onEndWholeObject(obj); if (data != null) onInflatedObjectData(obj, type, data); @@ -1069,8 +1073,21 @@ public abstract class PackParser { } } - private void verifySafeObject(final AnyObjectId id, final int type, - final byte[] data) throws IOException { + /** + * Verify the integrity of the object. + * + * @param id + * identity of the object to be checked. + * @param type + * the type of the object. + * @param data + * raw content of the object. + * @throws CorruptObjectException + * @since 4.9 + * + */ + protected void verifySafeObject(final AnyObjectId id, final int type, + final byte[] data) throws CorruptObjectException { if (objCheck != null) { try { objCheck.check(id, type, data); @@ -1078,11 +1095,11 @@ public abstract class PackParser { if (e.getErrorType() != null) { throw e; } - throw new CorruptObjectException(MessageFormat.format( - JGitText.get().invalidObject, - Constants.typeString(type), - id.name(), - e.getMessage()), e); + throw new CorruptObjectException( + MessageFormat.format(JGitText.get().invalidObject, + Constants.typeString(type), id.name(), + e.getMessage()), + e); } } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackedObjectInfo.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackedObjectInfo.java index 6da1c578c2..381c22893b 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackedObjectInfo.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackedObjectInfo.java @@ -45,6 +45,7 @@ package org.eclipse.jgit.transport; import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.ObjectIdOwnerMap; /** @@ -59,6 +60,8 @@ public class PackedObjectInfo extends ObjectIdOwnerMap.Entry { private int crc; + private int type = Constants.OBJ_BAD; + PackedObjectInfo(final long headerOffset, final int packedCRC, final AnyObjectId id) { super(id); @@ -112,4 +115,24 @@ public class PackedObjectInfo extends ObjectIdOwnerMap.Entry { public void setCRC(final int crc) { this.crc = crc; } + + /** + * @return the object type. The default type is OBJ_BAD, which is considered + * as unknown or invalid type. + * @since 4.9 + */ + public int getType() { + return type; + } + + /** + * Record the object type if applicable. + * + * @param type + * the object type. + * @since 4.9 + */ + public void setType(int type) { + this.type = type; + } } |