diff options
author | Shawn Pearce <spearce@spearce.org> | 2015-12-29 15:52:16 -0800 |
---|---|---|
committer | Shawn Pearce <spearce@spearce.org> | 2015-12-30 15:18:48 -0800 |
commit | fa7ce0e0f3a8973667b0d51966fc9bcb4fdbe505 (patch) | |
tree | 5611e5e2face4c89c68947f76fcc824664d876e5 /org.eclipse.jgit | |
parent | e3acf017486204fb56c33c5edd51d5f2409be7ee (diff) | |
download | jgit-fa7ce0e0f3a8973667b0d51966fc9bcb4fdbe505.tar.gz jgit-fa7ce0e0f3a8973667b0d51966fc9bcb4fdbe505.zip |
ObjectChecker: allow some objects to skip errors
Some ancient objects may be broken, but in a relatively harmless way.
Allow the ObjectChecker caller to whitelist specific objects that are
going to fail checks, but that have been reviewed by a human and decided
the objects are OK enough to permit continued use of.
This avoids needing to rewrite history to scrub the broken objects out.
Honor the git-core fsck.skipList configuration setting when receiving a
push or fetching from a remote repository.
Change-Id: I62bd7c0b0848981f73dd7c752860fd02794233a6
Diffstat (limited to 'org.eclipse.jgit')
5 files changed, 291 insertions, 55 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/LazyObjectIdSetFile.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/LazyObjectIdSetFile.java new file mode 100644 index 0000000000..1e2617c0e3 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/LazyObjectIdSetFile.java @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2015, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.file; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; + +import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.MutableObjectId; +import org.eclipse.jgit.lib.ObjectIdOwnerMap; +import org.eclipse.jgit.lib.ObjectIdSet; + +/** Lazily loads a set of ObjectIds, one per line. */ +public class LazyObjectIdSetFile implements ObjectIdSet { + private final File src; + private ObjectIdOwnerMap<Entry> set; + + /** + * Create a new lazy set from a file. + * + * @param src + * the source file. + */ + public LazyObjectIdSetFile(File src) { + this.src = src; + } + + @Override + public boolean contains(AnyObjectId objectId) { + if (set == null) { + set = load(); + } + return set.contains(objectId); + } + + private ObjectIdOwnerMap<Entry> load() { + ObjectIdOwnerMap<Entry> r = new ObjectIdOwnerMap<>(); + try (FileInputStream fin = new FileInputStream(src); + Reader rin = new InputStreamReader(fin, UTF_8); + BufferedReader br = new BufferedReader(rin)) { + MutableObjectId id = new MutableObjectId(); + for (String line; (line = br.readLine()) != null;) { + id.fromString(line); + if (!r.contains(id)) { + r.add(new Entry(id)); + } + } + } catch (IOException e) { + // Ignore IO errors accessing the lazy set. + } + return r; + } + + static class Entry extends ObjectIdOwnerMap.Entry { + Entry(AnyObjectId id) { + super(id); + } + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectChecker.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectChecker.java index 855d9d7509..89a526911a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectChecker.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectChecker.java @@ -44,6 +44,10 @@ package org.eclipse.jgit.lib; +import static org.eclipse.jgit.lib.Constants.OBJ_BLOB; +import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT; +import static org.eclipse.jgit.lib.Constants.OBJ_TAG; +import static org.eclipse.jgit.lib.Constants.OBJ_TREE; import static org.eclipse.jgit.util.RawParseUtils.match; import static org.eclipse.jgit.util.RawParseUtils.nextLF; import static org.eclipse.jgit.util.RawParseUtils.parseBase10; @@ -54,6 +58,7 @@ import java.util.HashSet; import java.util.Locale; import java.util.Set; +import org.eclipse.jgit.annotations.Nullable; import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.util.MutableInteger; @@ -99,16 +104,29 @@ public class ObjectChecker { public static final byte[] tagger = Constants.encodeASCII("tagger "); //$NON-NLS-1$ private final MutableObjectId tempId = new MutableObjectId(); - private final MutableInteger ptrout = new MutableInteger(); + private ObjectIdSet skipList; private boolean allowZeroMode; - private boolean allowInvalidPersonIdent; private boolean windows; private boolean macosx; /** + * Enable accepting specific malformed (but not horribly broken) objects. + * + * @param objects + * collection of object names known to be broken in a non-fatal + * way that should be ignored by the checker. + * @return {@code this} + * @since 4.2 + */ + public ObjectChecker setSkipList(@Nullable ObjectIdSet objects) { + skipList = objects; + return this; + } + + /** * Enable accepting leading zero mode in tree entries. * <p> * Some broken Git libraries generated leading zeros in the mode part of @@ -183,19 +201,40 @@ public class ObjectChecker { * @throws CorruptObjectException * if an error is identified. */ - public void check(final int objType, final byte[] raw) + public void check(int objType, byte[] raw) + throws CorruptObjectException { + check(idFor(objType, raw), objType, raw); + } + + /** + * Check an object for parsing errors. + * + * @param id + * identify of the object being checked. + * @param objType + * type of the object. Must be a valid object type code in + * {@link Constants}. + * @param raw + * the raw data which comprises the object. This should be in the + * canonical format (that is the format used to generate the + * ObjectId of the object). The array is never modified. + * @throws CorruptObjectException + * if an error is identified. + * @since 4.2 + */ + public void check(@Nullable AnyObjectId id, int objType, byte[] raw) throws CorruptObjectException { switch (objType) { - case Constants.OBJ_COMMIT: - checkCommit(raw); + case OBJ_COMMIT: + checkCommit(id, raw); break; - case Constants.OBJ_TAG: - checkTag(raw); + case OBJ_TAG: + checkTag(id, raw); break; - case Constants.OBJ_TREE: - checkTree(raw); + case OBJ_TREE: + checkTree(id, raw); break; - case Constants.OBJ_BLOB: + case OBJ_BLOB: checkBlob(raw); break; default: @@ -214,9 +253,9 @@ public class ObjectChecker { } } - private int personIdent(final byte[] raw, int ptr) { - if (allowInvalidPersonIdent) - return nextLF(raw, ptr) - 1; + private int personIdent(byte[] raw, int ptr, @Nullable AnyObjectId id) { + if (allowInvalidPersonIdent || skip(id)) + return nextLF(raw, ptr); final int emailB = nextLF(raw, ptr, '<'); if (emailB == ptr || raw[emailB - 1] != '<') @@ -238,18 +277,38 @@ public class ObjectChecker { parseBase10(raw, ptr + 1, ptrout); // tz offset if (ptr + 1 == ptrout.value) return -1; - return ptrout.value; + + ptr = ptrout.value; + if (raw[ptr++] == '\n') + return ptr; + return -1; + } + + /** + * Check a commit for errors. + * + * @param raw + * the commit data. The array is never modified. + * @throws CorruptObjectException + * if any error was detected. + */ + public void checkCommit(byte[] raw) throws CorruptObjectException { + checkCommit(idFor(OBJ_COMMIT, raw), raw); } /** * Check a commit for errors. * + * @param id + * identity of the object being checked. * @param raw * the commit data. The array is never modified. * @throws CorruptObjectException * if any error was detected. + * @since 4.2 */ - public void checkCommit(final byte[] raw) throws CorruptObjectException { + public void checkCommit(@Nullable AnyObjectId id, byte[] raw) + throws CorruptObjectException { int ptr = 0; if ((ptr = match(raw, ptr, tree)) < 0) @@ -266,30 +325,54 @@ public class ObjectChecker { JGitText.get().corruptObjectInvalidParent); } - if ((ptr = match(raw, ptr, author)) < 0) + int p = match(raw, ptr, author); + if (p > ptr) { + if ((ptr = personIdent(raw, p, id)) < 0) { + throw new CorruptObjectException( + JGitText.get().corruptObjectInvalidAuthor); + } + } else if (!skip(id)) { throw new CorruptObjectException( JGitText.get().corruptObjectNoAuthor); - if ((ptr = personIdent(raw, ptr)) < 0 || raw[ptr++] != '\n') - throw new CorruptObjectException( - JGitText.get().corruptObjectInvalidAuthor); + } - if ((ptr = match(raw, ptr, committer)) < 0) + p = match(raw, ptr, committer); + if (p > ptr) { + if ((ptr = personIdent(raw, p, id)) < 0) { + throw new CorruptObjectException( + JGitText.get().corruptObjectInvalidCommitter); + } + } else if (!skip(id)) { throw new CorruptObjectException( JGitText.get().corruptObjectNoCommitter); - if ((ptr = personIdent(raw, ptr)) < 0 || raw[ptr++] != '\n') - throw new CorruptObjectException( - JGitText.get().corruptObjectInvalidCommitter); + } + } + + /** + * Check an annotated tag for errors. + * + * @param raw + * the tag data. The array is never modified. + * @throws CorruptObjectException + * if any error was detected. + */ + public void checkTag(byte[] raw) throws CorruptObjectException { + checkTag(idFor(OBJ_TAG, raw), raw); } /** * Check an annotated tag for errors. * + * @param id + * identity of the object being checked. * @param raw * the tag data. The array is never modified. * @throws CorruptObjectException * if any error was detected. + * @since 4.2 */ - public void checkTag(final byte[] raw) throws CorruptObjectException { + public void checkTag(@Nullable AnyObjectId id, byte[] raw) + throws CorruptObjectException { int ptr = 0; if ((ptr = match(raw, ptr, object)) < 0) @@ -304,15 +387,16 @@ public class ObjectChecker { JGitText.get().corruptObjectNoTypeHeader); ptr = nextLF(raw, ptr); - if ((ptr = match(raw, ptr, tag)) < 0) + if (match(raw, ptr, tag) < 0 && !skip(id)) throw new CorruptObjectException( JGitText.get().corruptObjectNoTagHeader); ptr = nextLF(raw, ptr); if ((ptr = match(raw, ptr, tagger)) > 0) { - if ((ptr = personIdent(raw, ptr)) < 0 || raw[ptr++] != '\n') + if ((ptr = personIdent(raw, ptr, id)) < 0) { throw new CorruptObjectException( JGitText.get().corruptObjectInvalidTagger); + } } } @@ -381,11 +465,28 @@ public class ObjectChecker { * @throws CorruptObjectException * if any error was detected. */ - public void checkTree(final byte[] raw) throws CorruptObjectException { + public void checkTree(byte[] raw) throws CorruptObjectException { + checkTree(idFor(OBJ_TREE, raw), raw); + } + + /** + * Check a canonical formatted tree for errors. + * + * @param id + * identity of the object being checked. + * @param raw + * the raw tree data. The array is never modified. + * @throws CorruptObjectException + * if any error was detected. + * @since 4.2 + */ + public void checkTree(@Nullable AnyObjectId id, byte[] raw) + throws CorruptObjectException { final int sz = raw.length; int ptr = 0; int lastNameB = 0, lastNameE = 0, lastMode = 0; - Set<String> normalized = windows || macosx + boolean skip = skip(id); + Set<String> normalized = !skip && (windows || macosx) ? new HashSet<String>() : null; @@ -401,7 +502,7 @@ public class ObjectChecker { if (c < '0' || c > '7') throw new CorruptObjectException( JGitText.get().corruptObjectInvalidModeChar); - if (thisMode == 0 && c == '0' && !allowZeroMode) + if (thisMode == 0 && c == '0' && !allowZeroMode && !skip) throw new CorruptObjectException( JGitText.get().corruptObjectInvalidModeStartsZero); thisMode <<= 3; @@ -418,16 +519,16 @@ public class ObjectChecker { if (ptr == sz || raw[ptr] != 0) throw new CorruptObjectException( JGitText.get().corruptObjectTruncatedInName); - checkPathSegment2(raw, thisNameB, ptr); + checkPathSegment2(raw, thisNameB, ptr, skip); if (normalized != null) { if (!normalized.add(normalize(raw, thisNameB, ptr))) throw new CorruptObjectException( JGitText.get().corruptObjectDuplicateEntryNames); - } else if (duplicateName(raw, thisNameB, ptr)) + } else if (!skip && duplicateName(raw, thisNameB, ptr)) throw new CorruptObjectException( JGitText.get().corruptObjectDuplicateEntryNames); - if (lastNameB != 0) { + if (!skip && lastNameB != 0) { final int cmp = pathCompare(raw, lastNameB, lastNameE, lastMode, thisNameB, ptr, thisMode); if (cmp > 0) @@ -468,6 +569,19 @@ public class ObjectChecker { return ptr; } + @SuppressWarnings("resource") + @Nullable + private ObjectId idFor(int objType, byte[] raw) { + if (skipList != null) { + return new ObjectInserter.Formatter().idFor(objType, raw); + } + return null; + } + + private boolean skip(@Nullable AnyObjectId id) { + return skipList != null && id != null && skipList.contains(id); + } + /** * Check tree path entry for validity. * <p> @@ -522,10 +636,10 @@ public class ObjectChecker { if (e < end && raw[e] == 0) throw new CorruptObjectException( JGitText.get().corruptObjectNameContainsNullByte); - checkPathSegment2(raw, ptr, end); + checkPathSegment2(raw, ptr, end, false); } - private void checkPathSegment2(byte[] raw, int ptr, int end) + private void checkPathSegment2(byte[] raw, int ptr, int end, boolean skip) throws CorruptObjectException { if (ptr == end) throw new CorruptObjectException( @@ -541,36 +655,38 @@ public class ObjectChecker { JGitText.get().corruptObjectNameDotDot); break; case 4: - if (isGit(raw, ptr + 1)) + if (!skip && isGit(raw, ptr + 1)) throw new CorruptObjectException(String.format( JGitText.get().corruptObjectInvalidName, RawParseUtils.decode(raw, ptr, end))); break; default: - if (end - ptr > 4 && isNormalizedGit(raw, ptr + 1, end)) + if (!skip && end - ptr > 4 + && isNormalizedGit(raw, ptr + 1, end)) throw new CorruptObjectException(String.format( JGitText.get().corruptObjectInvalidName, RawParseUtils.decode(raw, ptr, end))); } - } else if (isGitTilde1(raw, ptr, end)) { + } else if (!skip && isGitTilde1(raw, ptr, end)) { throw new CorruptObjectException(String.format( JGitText.get().corruptObjectInvalidName, RawParseUtils.decode(raw, ptr, end))); } - - if (macosx && isMacHFSGit(raw, ptr, end)) - throw new CorruptObjectException(String.format( - JGitText.get().corruptObjectInvalidNameIgnorableUnicode, - RawParseUtils.decode(raw, ptr, end))); - - if (windows) { - // Windows ignores space and dot at end of file name. - if (raw[end - 1] == ' ' || raw[end - 1] == '.') + if (!skip) { + if (macosx && isMacHFSGit(raw, ptr, end)) throw new CorruptObjectException(String.format( - JGitText.get().corruptObjectInvalidNameEnd, - Character.valueOf(((char) raw[end - 1])))); - if (end - ptr >= 3) - checkNotWindowsDevice(raw, ptr, end); + JGitText.get().corruptObjectInvalidNameIgnorableUnicode, + RawParseUtils.decode(raw, ptr, end))); + + if (windows) { + // Windows ignores space and dot at end of file name. + if (raw[end - 1] == ' ' || raw[end - 1] == '.') + throw new CorruptObjectException(String.format( + JGitText.get().corruptObjectInvalidNameEnd, + Character.valueOf(((char) raw[end - 1])))); + if (end - ptr >= 3) + checkNotWindowsDevice(raw, ptr, end); + } } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java index 6e5fc9f009..42816bd689 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java @@ -1049,7 +1049,7 @@ public abstract class PackParser { final byte[] data) throws IOException { if (objCheck != null) { try { - objCheck.check(type, data); + objCheck.check(id, type, data); } catch (CorruptObjectException e) { throw new CorruptObjectException(MessageFormat.format( JGitText.get().invalidObject, diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/TransferConfig.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/TransferConfig.java index f9b74c84e5..2128f1f7e0 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/TransferConfig.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/TransferConfig.java @@ -43,13 +43,16 @@ package org.eclipse.jgit.transport; +import java.io.File; import java.util.HashMap; import java.util.Map; import org.eclipse.jgit.annotations.Nullable; +import org.eclipse.jgit.internal.storage.file.LazyObjectIdSetFile; import org.eclipse.jgit.lib.Config; import org.eclipse.jgit.lib.Config.SectionParser; import org.eclipse.jgit.lib.ObjectChecker; +import org.eclipse.jgit.lib.ObjectIdSet; import org.eclipse.jgit.lib.Ref; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.util.SystemReader; @@ -68,6 +71,7 @@ public class TransferConfig { private final boolean fetchFsck; private final boolean receiveFsck; + private final String fsckSkipList; private final boolean allowLeadingZeroFileMode; private final boolean allowInvalidPersonIdent; private final boolean safeForWindows; @@ -84,6 +88,7 @@ public class TransferConfig { boolean fsck = rc.getBoolean("transfer", "fsckobjects", false); //$NON-NLS-1$ //$NON-NLS-2$ fetchFsck = rc.getBoolean("fetch", "fsckobjects", fsck); //$NON-NLS-1$ //$NON-NLS-2$ receiveFsck = rc.getBoolean("receive", "fsckobjects", fsck); //$NON-NLS-1$ //$NON-NLS-2$ + fsckSkipList = rc.getString("fsck", null, "skipList"); //$NON-NLS-1$ //$NON-NLS-2$ allowLeadingZeroFileMode = rc.getBoolean("fsck", "allowLeadingZeroFileMode", false); //$NON-NLS-1$ //$NON-NLS-2$ allowInvalidPersonIdent = rc.getBoolean("fsck", "allowInvalidPersonIdent", false); //$NON-NLS-1$ //$NON-NLS-2$ safeForWindows = rc.getBoolean("fsck", "safeForWindows", //$NON-NLS-1$ //$NON-NLS-2$ @@ -126,7 +131,15 @@ public class TransferConfig { .setAllowLeadingZeroFileMode(allowLeadingZeroFileMode) .setAllowInvalidPersonIdent(allowInvalidPersonIdent) .setSafeForWindows(safeForWindows) - .setSafeForMacOS(safeForMacOS); + .setSafeForMacOS(safeForMacOS) + .setSkipList(skipList()); + } + + private ObjectIdSet skipList() { + if (fsckSkipList != null && !fsckSkipList.isEmpty()) { + return new LazyObjectIdSetFile(new File(fsckSkipList)); + } + return null; } /** diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/WalkFetchConnection.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/WalkFetchConnection.java index dfc3ee4c30..17edfdc4fb 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/WalkFetchConnection.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/WalkFetchConnection.java @@ -637,10 +637,11 @@ class WalkFetchConnection extends BaseFetchConnection { final byte[] raw = uol.getCachedBytes(); if (objCheck != null) { try { - objCheck.check(type, raw); + objCheck.check(id, type, raw); } catch (CorruptObjectException e) { - throw new TransportException(MessageFormat.format(JGitText.get().transportExceptionInvalid - , Constants.typeString(type), id.name(), e.getMessage())); + throw new TransportException(MessageFormat.format( + JGitText.get().transportExceptionInvalid, + Constants.typeString(type), id.name(), e.getMessage())); } } |