/* * Copyright (C) 2008-2010, Google Inc. * Copyright (C) 2008, Shawn O. Pearce * and other copyright owners as documented in the project's IP log. * * This program and the accompanying materials are made available * under the terms of the Eclipse Distribution License v1.0 which * accompanies this distribution, is reproduced below, and is * available at http://www.eclipse.org/org/documents/edl-v10.php * * All rights reserved. * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * - Neither the name of the Eclipse Foundation, Inc. nor the * names of its contributors may be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.eclipse.jgit.lib; import static org.eclipse.jgit.util.RawParseUtils.match; import static org.eclipse.jgit.util.RawParseUtils.nextLF; import static org.eclipse.jgit.util.RawParseUtils.parseBase10; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.text.MessageFormat; import java.util.HashSet; import java.util.Locale; import java.util.Set; import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.util.MutableInteger; import org.eclipse.jgit.util.RawParseUtils; /** * Verifies that an object is formatted correctly. *

* Verifications made by this class only check that the fields of an object are * formatted correctly. The ObjectId checksum of the object is not verified, and * connectivity links between objects are also not verified. Its assumed that * the caller can provide both of these validations on its own. *

* Instances of this class are not thread safe, but they may be reused to * perform multiple object validations. */ public class ObjectChecker { /** Header "tree " */ public static final byte[] tree = Constants.encodeASCII("tree "); //$NON-NLS-1$ /** Header "parent " */ public static final byte[] parent = Constants.encodeASCII("parent "); //$NON-NLS-1$ /** Header "author " */ public static final byte[] author = Constants.encodeASCII("author "); //$NON-NLS-1$ /** Header "committer " */ public static final byte[] committer = Constants.encodeASCII("committer "); //$NON-NLS-1$ /** Header "encoding " */ public static final byte[] encoding = Constants.encodeASCII("encoding "); //$NON-NLS-1$ /** Header "object " */ public static final byte[] object = Constants.encodeASCII("object "); //$NON-NLS-1$ /** Header "type " */ public static final byte[] type = Constants.encodeASCII("type "); //$NON-NLS-1$ /** Header "tag " */ public static final byte[] tag = Constants.encodeASCII("tag "); //$NON-NLS-1$ /** Header "tagger " */ public static final byte[] tagger = Constants.encodeASCII("tagger "); //$NON-NLS-1$ private final MutableObjectId tempId = new MutableObjectId(); private final MutableInteger ptrout = new MutableInteger(); private boolean allowZeroMode; private boolean windows; private boolean macosx; /** * Enable accepting leading zero mode in tree entries. *

* Some broken Git libraries generated leading zeros in the mode part of * tree entries. This is technically incorrect but gracefully allowed by * git-core. JGit rejects such trees by default, but may need to accept * them on broken histories. * * @param allow allow leading zero mode. * @return {@code this}. * @since 3.4 */ public ObjectChecker setAllowLeadingZeroFileMode(boolean allow) { allowZeroMode = allow; return this; } /** * Restrict trees to only names legal on Windows platforms. *

* Also rejects any mixed case forms of reserved names ({@code .git}). * * @param win true if Windows name checking should be performed. * @return {@code this}. * @since 3.4 */ public ObjectChecker setSafeForWindows(boolean win) { windows = win; return this; } /** * Restrict trees to only names legal on Mac OS X platforms. *

* Rejects any mixed case forms of reserved names ({@code .git}) * for users working on HFS+ in case-insensitive (default) mode. * * @param mac true if Mac OS X name checking should be performed. * @return {@code this}. * @since 3.4 */ public ObjectChecker setSafeForMacOS(boolean mac) { macosx = mac; return this; } /** * Check an object for parsing errors. * * @param objType * type of the object. Must be a valid object type code in * {@link Constants}. * @param raw * the raw data which comprises the object. This should be in the * canonical format (that is the format used to generate the * ObjectId of the object). The array is never modified. * @throws CorruptObjectException * if an error is identified. */ public void check(final int objType, final byte[] raw) throws CorruptObjectException { switch (objType) { case Constants.OBJ_COMMIT: checkCommit(raw); break; case Constants.OBJ_TAG: checkTag(raw); break; case Constants.OBJ_TREE: checkTree(raw); break; case Constants.OBJ_BLOB: checkBlob(raw); break; default: throw new CorruptObjectException(MessageFormat.format( JGitText.get().corruptObjectInvalidType2, Integer.valueOf(objType))); } } private int id(final byte[] raw, final int ptr) { try { tempId.fromString(raw, ptr); return ptr + Constants.OBJECT_ID_STRING_LENGTH; } catch (IllegalArgumentException e) { return -1; } } private int personIdent(final byte[] raw, int ptr) { final int emailB = nextLF(raw, ptr, '<'); if (emailB == ptr || raw[emailB - 1] != '<') return -1; final int emailE = nextLF(raw, emailB, '>'); if (emailE == emailB || raw[emailE - 1] != '>') return -1; if (emailE == raw.length || raw[emailE] != ' ') return -1; parseBase10(raw, emailE + 1, ptrout); // when ptr = ptrout.value; if (emailE + 1 == ptr) return -1; if (ptr == raw.length || raw[ptr] != ' ') return -1; parseBase10(raw, ptr + 1, ptrout); // tz offset if (ptr + 1 == ptrout.value) return -1; return ptrout.value; } /** * Check a commit for errors. * * @param raw * the commit data. The array is never modified. * @throws CorruptObjectException * if any error was detected. */ public void checkCommit(final byte[] raw) throws CorruptObjectException { int ptr = 0; if ((ptr = match(raw, ptr, tree)) < 0) throw new CorruptObjectException("no tree header"); if ((ptr = id(raw, ptr)) < 0 || raw[ptr++] != '\n') throw new CorruptObjectException("invalid tree"); while (match(raw, ptr, parent) >= 0) { ptr += parent.length; if ((ptr = id(raw, ptr)) < 0 || raw[ptr++] != '\n') throw new CorruptObjectException("invalid parent"); } if ((ptr = match(raw, ptr, author)) < 0) throw new CorruptObjectException("no author"); if ((ptr = personIdent(raw, ptr)) < 0 || raw[ptr++] != '\n') throw new CorruptObjectException("invalid author"); if ((ptr = match(raw, ptr, committer)) < 0) throw new CorruptObjectException("no committer"); if ((ptr = personIdent(raw, ptr)) < 0 || raw[ptr++] != '\n') throw new CorruptObjectException("invalid committer"); } /** * Check an annotated tag for errors. * * @param raw * the tag data. The array is never modified. * @throws CorruptObjectException * if any error was detected. */ public void checkTag(final byte[] raw) throws CorruptObjectException { int ptr = 0; if ((ptr = match(raw, ptr, object)) < 0) throw new CorruptObjectException("no object header"); if ((ptr = id(raw, ptr)) < 0 || raw[ptr++] != '\n') throw new CorruptObjectException("invalid object"); if ((ptr = match(raw, ptr, type)) < 0) throw new CorruptObjectException("no type header"); ptr = nextLF(raw, ptr); if ((ptr = match(raw, ptr, tag)) < 0) throw new CorruptObjectException("no tag header"); ptr = nextLF(raw, ptr); if ((ptr = match(raw, ptr, tagger)) > 0) { if ((ptr = personIdent(raw, ptr)) < 0 || raw[ptr++] != '\n') throw new CorruptObjectException("invalid tagger"); } } private static int lastPathChar(final int mode) { return FileMode.TREE.equals(mode) ? '/' : '\0'; } private static int pathCompare(final byte[] raw, int aPos, final int aEnd, final int aMode, int bPos, final int bEnd, final int bMode) { while (aPos < aEnd && bPos < bEnd) { final int cmp = (raw[aPos++] & 0xff) - (raw[bPos++] & 0xff); if (cmp != 0) return cmp; } if (aPos < aEnd) return (raw[aPos] & 0xff) - lastPathChar(bMode); if (bPos < bEnd) return lastPathChar(aMode) - (raw[bPos] & 0xff); return 0; } private static boolean duplicateName(final byte[] raw, final int thisNamePos, final int thisNameEnd) { final int sz = raw.length; int nextPtr = thisNameEnd + 1 + Constants.OBJECT_ID_LENGTH; for (;;) { int nextMode = 0; for (;;) { if (nextPtr >= sz) return false; final byte c = raw[nextPtr++]; if (' ' == c) break; nextMode <<= 3; nextMode += c - '0'; } final int nextNamePos = nextPtr; for (;;) { if (nextPtr == sz) return false; final byte c = raw[nextPtr++]; if (c == 0) break; } if (nextNamePos + 1 == nextPtr) return false; final int cmp = pathCompare(raw, thisNamePos, thisNameEnd, FileMode.TREE.getBits(), nextNamePos, nextPtr - 1, nextMode); if (cmp < 0) return false; else if (cmp == 0) return true; nextPtr += Constants.OBJECT_ID_LENGTH; } } /** * Check a canonical formatted tree for errors. * * @param raw * the raw tree data. The array is never modified. * @throws CorruptObjectException * if any error was detected. */ public void checkTree(final byte[] raw) throws CorruptObjectException { final int sz = raw.length; int ptr = 0; int lastNameB = 0, lastNameE = 0, lastMode = 0; Set normalized = windows || macosx ? new HashSet() : null; while (ptr < sz) { int thisMode = 0; for (;;) { if (ptr == sz) throw new CorruptObjectException("truncated in mode"); final byte c = raw[ptr++]; if (' ' == c) break; if (c < '0' || c > '7') throw new CorruptObjectException("invalid mode character"); if (thisMode == 0 && c == '0' && !allowZeroMode) throw new CorruptObjectException("mode starts with '0'"); thisMode <<= 3; thisMode += c - '0'; } if (FileMode.fromBits(thisMode).getObjectType() == Constants.OBJ_BAD) throw new CorruptObjectException("invalid mode " + thisMode); final int thisNameB = ptr; ptr = scanPathSegment(raw, ptr, sz); if (ptr == sz || raw[ptr] != 0) throw new CorruptObjectException("truncated in name"); checkPathSegment2(raw, thisNameB, ptr); if (normalized != null) { if (!normalized.add(normalize(raw, thisNameB, ptr))) throw new CorruptObjectException("duplicate entry names"); } else if (duplicateName(raw, thisNameB, ptr)) throw new CorruptObjectException("duplicate entry names"); if (lastNameB != 0) { final int cmp = pathCompare(raw, lastNameB, lastNameE, lastMode, thisNameB, ptr, thisMode); if (cmp > 0) throw new CorruptObjectException("incorrectly sorted"); } lastNameB = thisNameB; lastNameE = ptr; lastMode = thisMode; ptr += 1 + Constants.OBJECT_ID_LENGTH; if (ptr > sz) throw new CorruptObjectException("truncated in object id"); } } private int scanPathSegment(byte[] raw, int ptr, int end) throws CorruptObjectException { for (; ptr < end; ptr++) { byte c = raw[ptr]; if (c == 0) return ptr; if (c == '/') throw new CorruptObjectException("name contains '/'"); if (windows && isInvalidOnWindows(c)) { if (c > 31) throw new CorruptObjectException(String.format( "name contains '%c'", c)); throw new CorruptObjectException(String.format( "name contains byte 0x%x", c & 0xff)); } } return ptr; } /** * Check tree path entry for validity. * * @param raw buffer to scan. * @param ptr offset to first byte of the name. * @param end offset to one past last byte of name. * @throws CorruptObjectException name is invalid. * @since 3.4 */ public void checkPathSegment(byte[] raw, int ptr, int end) throws CorruptObjectException { int e = scanPathSegment(raw, ptr, end); if (e < end && raw[e] == 0) throw new CorruptObjectException("name contains byte 0x00"); checkPathSegment2(raw, ptr, end); } private void checkPathSegment2(byte[] raw, int ptr, int end) throws CorruptObjectException { if (ptr == end) throw new CorruptObjectException("zero length name"); if (raw[ptr] == '.') { switch (end - ptr) { case 1: throw new CorruptObjectException("invalid name '.'"); case 2: if (raw[ptr + 1] == '.') throw new CorruptObjectException("invalid name '..'"); break; case 4: if (isGit(raw, ptr + 1)) throw new CorruptObjectException(String.format( "invalid name '%s'", RawParseUtils.decode(raw, ptr, end))); break; default: if (end - ptr > 4 && isNormalizedGit(raw, ptr + 1, end)) throw new CorruptObjectException(String.format( "invalid name '%s'", RawParseUtils.decode(raw, ptr, end))); } } if (windows) { // Windows ignores space and dot at end of file name. if (raw[end - 1] == ' ' || raw[end - 1] == '.') throw new CorruptObjectException("invalid name ends with '" + ((char) raw[end - 1]) + "'"); if (end - ptr >= 3) checkNotWindowsDevice(raw, ptr, end); } } private static void checkNotWindowsDevice(byte[] raw, int ptr, int end) throws CorruptObjectException { switch (toLower(raw[ptr])) { case 'a': // AUX if (end - ptr >= 3 && toLower(raw[ptr + 1]) == 'u' && toLower(raw[ptr + 2]) == 'x' && (end - ptr == 3 || raw[ptr + 3] == '.')) throw new CorruptObjectException("invalid name 'AUX'"); break; case 'c': // CON, COM[1-9] if (end - ptr >= 3 && toLower(raw[ptr + 2]) == 'n' && toLower(raw[ptr + 1]) == 'o' && (end - ptr == 3 || raw[ptr + 3] == '.')) throw new CorruptObjectException("invalid name 'CON'"); if (end - ptr >= 4 && toLower(raw[ptr + 2]) == 'm' && toLower(raw[ptr + 1]) == 'o' && isPositiveDigit(raw[ptr + 3]) && (end - ptr == 4 || raw[ptr + 4] == '.')) throw new CorruptObjectException("invalid name 'COM" + ((char) raw[ptr + 3]) + "'"); break; case 'l': // LPT[1-9] if (end - ptr >= 4 && toLower(raw[ptr + 1]) == 'p' && toLower(raw[ptr + 2]) == 't' && isPositiveDigit(raw[ptr + 3]) && (end - ptr == 4 || raw[ptr + 4] == '.')) throw new CorruptObjectException("invalid name 'LPT" + ((char) raw[ptr + 3]) + "'"); break; case 'n': // NUL if (end - ptr >= 3 && toLower(raw[ptr + 1]) == 'u' && toLower(raw[ptr + 2]) == 'l' && (end - ptr == 3 || raw[ptr + 3] == '.')) throw new CorruptObjectException("invalid name 'NUL'"); break; case 'p': // PRN if (end - ptr >= 3 && toLower(raw[ptr + 1]) == 'r' && toLower(raw[ptr + 2]) == 'n' && (end - ptr == 3 || raw[ptr + 3] == '.')) throw new CorruptObjectException("invalid name 'PRN'"); break; } } private static boolean isInvalidOnWindows(byte c) { // Windows disallows "special" characters in a path component. switch (c) { case '"': case '*': case ':': case '<': case '>': case '?': case '\\': case '|': return true; } return 1 <= c && c <= 31; } private static boolean isGit(byte[] buf, int p) { return toLower(buf[p]) == 'g' && toLower(buf[p + 1]) == 'i' && toLower(buf[p + 2]) == 't'; } private static boolean isNormalizedGit(byte[] raw, int ptr, int end) { if (isGit(raw, ptr)) { int dots = 0; boolean space = false; int p = end - 1; for (; (ptr + 2) < p; p--) { if (raw[p] == '.') dots++; else if (raw[p] == ' ') space = true; else break; } return p == ptr + 2 && (dots == 1 || space); } return false; } private static char toLower(byte b) { if ('A' <= b && b <= 'Z') return (char) (b + ('a' - 'A')); return (char) b; } private static boolean isPositiveDigit(byte b) { return '1' <= b && b <= '9'; } /** * Check a blob for errors. * * @param raw * the blob data. The array is never modified. * @throws CorruptObjectException * if any error was detected. */ public void checkBlob(final byte[] raw) throws CorruptObjectException { // We can always assume the blob is valid. } private String normalize(byte[] raw, int ptr, int end) { String n = RawParseUtils.decode(raw, ptr, end).toLowerCase(Locale.US); return macosx ? Normalizer.normalize(n) : n; } private static class Normalizer { // TODO Simplify invocation to Normalizer after dropping Java 5. private static final Method normalize; private static final Object nfc; static { Method method; Object formNfc; try { Class formClazz = Class.forName("java.text.Normalizer$Form"); //$NON-NLS-1$ formNfc = formClazz.getField("NFC").get(null); //$NON-NLS-1$ method = Class.forName("java.text.Normalizer") //$NON-NLS-1$ .getMethod("normalize", CharSequence.class, formClazz); //$NON-NLS-1$ } catch (ClassNotFoundException e) { method = null; formNfc = null; } catch (NoSuchFieldException e) { method = null; formNfc = null; } catch (NoSuchMethodException e) { method = null; formNfc = null; } catch (SecurityException e) { method = null; formNfc = null; } catch (IllegalArgumentException e) { method = null; formNfc = null; } catch (IllegalAccessException e) { method = null; formNfc = null; } normalize = method; nfc = formNfc; } static String normalize(String in) { if (normalize == null) return in; try { return (String) normalize.invoke(null, in, nfc); } catch (IllegalAccessException e) { return in; } catch (InvocationTargetException e) { if (e.getCause() instanceof RuntimeException) throw (RuntimeException) e.getCause(); if (e.getCause() instanceof Error) throw (Error) e.getCause(); return in; } } } }