123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254 |
- /*
- * Copyright (C) 2008-2010, Google Inc.
- * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
- * and other copyright owners as documented in the project's IP log.
- *
- * This program and the accompanying materials are made available
- * under the terms of the Eclipse Distribution License v1.0 which
- * accompanies this distribution, is reproduced below, and is
- * available at http://www.eclipse.org/org/documents/edl-v10.php
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * - Neither the name of the Eclipse Foundation, Inc. nor the
- * names of its contributors may be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
- package org.eclipse.jgit.lib;
-
- import static org.eclipse.jgit.lib.Constants.DOT_GIT_MODULES;
- import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
- import static org.eclipse.jgit.lib.Constants.OBJECT_ID_STRING_LENGTH;
- import static org.eclipse.jgit.lib.Constants.OBJ_BAD;
- import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
- import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT;
- import static org.eclipse.jgit.lib.Constants.OBJ_TAG;
- import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_DATE;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_EMAIL;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_OBJECT_SHA1;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_PARENT_SHA1;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_TIMEZONE;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_TREE_SHA1;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_UTF8;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.DUPLICATE_ENTRIES;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.EMPTY_NAME;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.FULL_PATHNAME;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOT;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOTDOT;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOTGIT;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_AUTHOR;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_COMMITTER;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_EMAIL;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_OBJECT;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_SPACE_BEFORE_DATE;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TAG_ENTRY;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TREE;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TYPE_ENTRY;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.NULL_SHA1;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.TREE_NOT_SORTED;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.UNKNOWN_TYPE;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.WIN32_BAD_NAME;
- import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.ZERO_PADDED_FILEMODE;
- import static org.eclipse.jgit.util.Paths.compare;
- import static org.eclipse.jgit.util.Paths.compareSameName;
- import static org.eclipse.jgit.util.RawParseUtils.nextLF;
- import static org.eclipse.jgit.util.RawParseUtils.parseBase10;
-
- import java.text.MessageFormat;
- import java.text.Normalizer;
- import java.util.ArrayList;
- import java.util.EnumSet;
- import java.util.HashSet;
- import java.util.List;
- import java.util.Locale;
- import java.util.Set;
-
- import org.eclipse.jgit.annotations.NonNull;
- import org.eclipse.jgit.annotations.Nullable;
- import org.eclipse.jgit.errors.CorruptObjectException;
- import org.eclipse.jgit.internal.JGitText;
- import org.eclipse.jgit.util.MutableInteger;
- import org.eclipse.jgit.util.RawParseUtils;
- import org.eclipse.jgit.util.StringUtils;
-
- /**
- * Verifies that an object is formatted correctly.
- * <p>
- * Verifications made by this class only check that the fields of an object are
- * formatted correctly. The ObjectId checksum of the object is not verified, and
- * connectivity links between objects are also not verified. Its assumed that
- * the caller can provide both of these validations on its own.
- * <p>
- * Instances of this class are not thread safe, but they may be reused to
- * perform multiple object validations.
- */
- public class ObjectChecker {
- /** Header "tree " */
- public static final byte[] tree = Constants.encodeASCII("tree "); //$NON-NLS-1$
-
- /** Header "parent " */
- public static final byte[] parent = Constants.encodeASCII("parent "); //$NON-NLS-1$
-
- /** Header "author " */
- public static final byte[] author = Constants.encodeASCII("author "); //$NON-NLS-1$
-
- /** Header "committer " */
- public static final byte[] committer = Constants.encodeASCII("committer "); //$NON-NLS-1$
-
- /** Header "encoding " */
- public static final byte[] encoding = Constants.encodeASCII("encoding "); //$NON-NLS-1$
-
- /** Header "object " */
- public static final byte[] object = Constants.encodeASCII("object "); //$NON-NLS-1$
-
- /** Header "type " */
- public static final byte[] type = Constants.encodeASCII("type "); //$NON-NLS-1$
-
- /** Header "tag " */
- public static final byte[] tag = Constants.encodeASCII("tag "); //$NON-NLS-1$
-
- /** Header "tagger " */
- public static final byte[] tagger = Constants.encodeASCII("tagger "); //$NON-NLS-1$
-
- /** Path ".gitmodules" */
- private static final byte[] dotGitmodules = Constants.encodeASCII(DOT_GIT_MODULES);
-
- /**
- * Potential issues identified by the checker.
- *
- * @since 4.2
- */
- public enum ErrorType {
- // @formatter:off
- // These names match git-core so that fsck section keys also match.
- /***/ NULL_SHA1,
- /***/ DUPLICATE_ENTRIES,
- /***/ TREE_NOT_SORTED,
- /***/ ZERO_PADDED_FILEMODE,
- /***/ EMPTY_NAME,
- /***/ FULL_PATHNAME,
- /***/ HAS_DOT,
- /***/ HAS_DOTDOT,
- /***/ HAS_DOTGIT,
- /***/ BAD_OBJECT_SHA1,
- /***/ BAD_PARENT_SHA1,
- /***/ BAD_TREE_SHA1,
- /***/ MISSING_AUTHOR,
- /***/ MISSING_COMMITTER,
- /***/ MISSING_OBJECT,
- /***/ MISSING_TREE,
- /***/ MISSING_TYPE_ENTRY,
- /***/ MISSING_TAG_ENTRY,
- /***/ BAD_DATE,
- /***/ BAD_EMAIL,
- /***/ BAD_TIMEZONE,
- /***/ MISSING_EMAIL,
- /***/ MISSING_SPACE_BEFORE_DATE,
- /***/ UNKNOWN_TYPE,
-
- // These are unique to JGit.
- /***/ WIN32_BAD_NAME,
- /***/ BAD_UTF8;
- // @formatter:on
-
- /** @return camelCaseVersion of the name. */
- public String getMessageId() {
- String n = name();
- StringBuilder r = new StringBuilder(n.length());
- for (int i = 0; i < n.length(); i++) {
- char c = n.charAt(i);
- if (c != '_') {
- r.append(StringUtils.toLowerCase(c));
- } else {
- r.append(n.charAt(++i));
- }
- }
- return r.toString();
- }
- }
-
- private final MutableObjectId tempId = new MutableObjectId();
- private final MutableInteger bufPtr = new MutableInteger();
-
- private EnumSet<ErrorType> errors = EnumSet.allOf(ErrorType.class);
- private ObjectIdSet skipList;
- private boolean allowInvalidPersonIdent;
- private boolean windows;
- private boolean macosx;
-
- private final List<GitmoduleEntry> gitsubmodules = new ArrayList<>();
-
- /**
- * Enable accepting specific malformed (but not horribly broken) objects.
- *
- * @param objects
- * collection of object names known to be broken in a non-fatal
- * way that should be ignored by the checker.
- * @return {@code this}
- * @since 4.2
- */
- public ObjectChecker setSkipList(@Nullable ObjectIdSet objects) {
- skipList = objects;
- return this;
- }
-
- /**
- * Configure error types to be ignored across all objects.
- *
- * @param ids
- * error types to ignore. The caller's set is copied.
- * @return {@code this}
- * @since 4.2
- */
- public ObjectChecker setIgnore(@Nullable Set<ErrorType> ids) {
- errors = EnumSet.allOf(ErrorType.class);
- if (ids != null) {
- errors.removeAll(ids);
- }
- return this;
- }
-
- /**
- * Add message type to be ignored across all objects.
- *
- * @param id
- * error type to ignore.
- * @param ignore
- * true to ignore this error; false to treat the error as an
- * error and throw.
- * @return {@code this}
- * @since 4.2
- */
- public ObjectChecker setIgnore(ErrorType id, boolean ignore) {
- if (ignore) {
- errors.remove(id);
- } else {
- errors.add(id);
- }
- return this;
- }
-
- /**
- * Enable accepting leading zero mode in tree entries.
- * <p>
- * Some broken Git libraries generated leading zeros in the mode part of
- * tree entries. This is technically incorrect but gracefully allowed by
- * git-core. JGit rejects such trees by default, but may need to accept
- * them on broken histories.
- * <p>
- * Same as {@code setIgnore(ZERO_PADDED_FILEMODE, allow)}.
- *
- * @param allow allow leading zero mode.
- * @return {@code this}.
- * @since 3.4
- */
- public ObjectChecker setAllowLeadingZeroFileMode(boolean allow) {
- return setIgnore(ZERO_PADDED_FILEMODE, allow);
- }
-
- /**
- * Enable accepting invalid author, committer and tagger identities.
- * <p>
- * Some broken Git versions/libraries allowed users to create commits and
- * tags with invalid formatting between the name, email and timestamp.
- *
- * @param allow
- * if true accept invalid person identity strings.
- * @return {@code this}.
- * @since 4.0
- */
- public ObjectChecker setAllowInvalidPersonIdent(boolean allow) {
- allowInvalidPersonIdent = allow;
- return this;
- }
-
- /**
- * Restrict trees to only names legal on Windows platforms.
- * <p>
- * Also rejects any mixed case forms of reserved names ({@code .git}).
- *
- * @param win true if Windows name checking should be performed.
- * @return {@code this}.
- * @since 3.4
- */
- public ObjectChecker setSafeForWindows(boolean win) {
- windows = win;
- return this;
- }
-
- /**
- * Restrict trees to only names legal on Mac OS X platforms.
- * <p>
- * Rejects any mixed case forms of reserved names ({@code .git})
- * for users working on HFS+ in case-insensitive (default) mode.
- *
- * @param mac true if Mac OS X name checking should be performed.
- * @return {@code this}.
- * @since 3.4
- */
- public ObjectChecker setSafeForMacOS(boolean mac) {
- macosx = mac;
- return this;
- }
-
- /**
- * Check an object for parsing errors.
- *
- * @param objType
- * type of the object. Must be a valid object type code in
- * {@link org.eclipse.jgit.lib.Constants}.
- * @param raw
- * the raw data which comprises the object. This should be in the
- * canonical format (that is the format used to generate the
- * ObjectId of the object). The array is never modified.
- * @throws org.eclipse.jgit.errors.CorruptObjectException
- * if an error is identified.
- */
- public void check(int objType, byte[] raw)
- throws CorruptObjectException {
- check(idFor(objType, raw), objType, raw);
- }
-
- /**
- * Check an object for parsing errors.
- *
- * @param id
- * identify of the object being checked.
- * @param objType
- * type of the object. Must be a valid object type code in
- * {@link org.eclipse.jgit.lib.Constants}.
- * @param raw
- * the raw data which comprises the object. This should be in the
- * canonical format (that is the format used to generate the
- * ObjectId of the object). The array is never modified.
- * @throws org.eclipse.jgit.errors.CorruptObjectException
- * if an error is identified.
- * @since 4.2
- */
- public void check(@Nullable AnyObjectId id, int objType, byte[] raw)
- throws CorruptObjectException {
- switch (objType) {
- case OBJ_COMMIT:
- checkCommit(id, raw);
- break;
- case OBJ_TAG:
- checkTag(id, raw);
- break;
- case OBJ_TREE:
- checkTree(id, raw);
- break;
- case OBJ_BLOB:
- BlobObjectChecker checker = newBlobObjectChecker();
- if (checker == null) {
- checkBlob(raw);
- } else {
- checker.update(raw, 0, raw.length);
- checker.endBlob(id);
- }
- break;
- default:
- report(UNKNOWN_TYPE, id, MessageFormat.format(
- JGitText.get().corruptObjectInvalidType2,
- Integer.valueOf(objType)));
- }
- }
-
- private boolean checkId(byte[] raw) {
- int p = bufPtr.value;
- try {
- tempId.fromString(raw, p);
- } catch (IllegalArgumentException e) {
- bufPtr.value = nextLF(raw, p);
- return false;
- }
-
- p += OBJECT_ID_STRING_LENGTH;
- if (raw[p] == '\n') {
- bufPtr.value = p + 1;
- return true;
- }
- bufPtr.value = nextLF(raw, p);
- return false;
- }
-
- private void checkPersonIdent(byte[] raw, @Nullable AnyObjectId id)
- throws CorruptObjectException {
- if (allowInvalidPersonIdent) {
- bufPtr.value = nextLF(raw, bufPtr.value);
- return;
- }
-
- final int emailB = nextLF(raw, bufPtr.value, '<');
- if (emailB == bufPtr.value || raw[emailB - 1] != '<') {
- report(MISSING_EMAIL, id, JGitText.get().corruptObjectMissingEmail);
- bufPtr.value = nextLF(raw, bufPtr.value);
- return;
- }
-
- final int emailE = nextLF(raw, emailB, '>');
- if (emailE == emailB || raw[emailE - 1] != '>') {
- report(BAD_EMAIL, id, JGitText.get().corruptObjectBadEmail);
- bufPtr.value = nextLF(raw, bufPtr.value);
- return;
- }
- if (emailE == raw.length || raw[emailE] != ' ') {
- report(MISSING_SPACE_BEFORE_DATE, id,
- JGitText.get().corruptObjectBadDate);
- bufPtr.value = nextLF(raw, bufPtr.value);
- return;
- }
-
- parseBase10(raw, emailE + 1, bufPtr); // when
- if (emailE + 1 == bufPtr.value || bufPtr.value == raw.length
- || raw[bufPtr.value] != ' ') {
- report(BAD_DATE, id, JGitText.get().corruptObjectBadDate);
- bufPtr.value = nextLF(raw, bufPtr.value);
- return;
- }
-
- int p = bufPtr.value + 1;
- parseBase10(raw, p, bufPtr); // tz offset
- if (p == bufPtr.value) {
- report(BAD_TIMEZONE, id, JGitText.get().corruptObjectBadTimezone);
- bufPtr.value = nextLF(raw, bufPtr.value);
- return;
- }
-
- p = bufPtr.value;
- if (raw[p] == '\n') {
- bufPtr.value = p + 1;
- } else {
- report(BAD_TIMEZONE, id, JGitText.get().corruptObjectBadTimezone);
- bufPtr.value = nextLF(raw, p);
- }
- }
-
- /**
- * Check a commit for errors.
- *
- * @param raw
- * the commit data. The array is never modified.
- * @throws org.eclipse.jgit.errors.CorruptObjectException
- * if any error was detected.
- */
- public void checkCommit(byte[] raw) throws CorruptObjectException {
- checkCommit(idFor(OBJ_COMMIT, raw), raw);
- }
-
- /**
- * Check a commit for errors.
- *
- * @param id
- * identity of the object being checked.
- * @param raw
- * the commit data. The array is never modified.
- * @throws org.eclipse.jgit.errors.CorruptObjectException
- * if any error was detected.
- * @since 4.2
- */
- public void checkCommit(@Nullable AnyObjectId id, byte[] raw)
- throws CorruptObjectException {
- bufPtr.value = 0;
-
- if (!match(raw, tree)) {
- report(MISSING_TREE, id, JGitText.get().corruptObjectNotreeHeader);
- } else if (!checkId(raw)) {
- report(BAD_TREE_SHA1, id, JGitText.get().corruptObjectInvalidTree);
- }
-
- while (match(raw, parent)) {
- if (!checkId(raw)) {
- report(BAD_PARENT_SHA1, id,
- JGitText.get().corruptObjectInvalidParent);
- }
- }
-
- if (match(raw, author)) {
- checkPersonIdent(raw, id);
- } else {
- report(MISSING_AUTHOR, id, JGitText.get().corruptObjectNoAuthor);
- }
-
- if (match(raw, committer)) {
- checkPersonIdent(raw, id);
- } else {
- report(MISSING_COMMITTER, id,
- JGitText.get().corruptObjectNoCommitter);
- }
- }
-
- /**
- * Check an annotated tag for errors.
- *
- * @param raw
- * the tag data. The array is never modified.
- * @throws org.eclipse.jgit.errors.CorruptObjectException
- * if any error was detected.
- */
- public void checkTag(byte[] raw) throws CorruptObjectException {
- checkTag(idFor(OBJ_TAG, raw), raw);
- }
-
- /**
- * Check an annotated tag for errors.
- *
- * @param id
- * identity of the object being checked.
- * @param raw
- * the tag data. The array is never modified.
- * @throws org.eclipse.jgit.errors.CorruptObjectException
- * if any error was detected.
- * @since 4.2
- */
- public void checkTag(@Nullable AnyObjectId id, byte[] raw)
- throws CorruptObjectException {
- bufPtr.value = 0;
- if (!match(raw, object)) {
- report(MISSING_OBJECT, id,
- JGitText.get().corruptObjectNoObjectHeader);
- } else if (!checkId(raw)) {
- report(BAD_OBJECT_SHA1, id,
- JGitText.get().corruptObjectInvalidObject);
- }
-
- if (!match(raw, type)) {
- report(MISSING_TYPE_ENTRY, id,
- JGitText.get().corruptObjectNoTypeHeader);
- }
- bufPtr.value = nextLF(raw, bufPtr.value);
-
- if (!match(raw, tag)) {
- report(MISSING_TAG_ENTRY, id,
- JGitText.get().corruptObjectNoTagHeader);
- }
- bufPtr.value = nextLF(raw, bufPtr.value);
-
- if (match(raw, tagger)) {
- checkPersonIdent(raw, id);
- }
- }
-
- private static boolean duplicateName(final byte[] raw,
- final int thisNamePos, final int thisNameEnd) {
- final int sz = raw.length;
- int nextPtr = thisNameEnd + 1 + Constants.OBJECT_ID_LENGTH;
- for (;;) {
- int nextMode = 0;
- for (;;) {
- if (nextPtr >= sz)
- return false;
- final byte c = raw[nextPtr++];
- if (' ' == c)
- break;
- nextMode <<= 3;
- nextMode += c - '0';
- }
-
- final int nextNamePos = nextPtr;
- for (;;) {
- if (nextPtr == sz)
- return false;
- final byte c = raw[nextPtr++];
- if (c == 0)
- break;
- }
- if (nextNamePos + 1 == nextPtr)
- return false;
-
- int cmp = compareSameName(
- raw, thisNamePos, thisNameEnd,
- raw, nextNamePos, nextPtr - 1, nextMode);
- if (cmp < 0)
- return false;
- else if (cmp == 0)
- return true;
-
- nextPtr += Constants.OBJECT_ID_LENGTH;
- }
- }
-
- /**
- * Check a canonical formatted tree for errors.
- *
- * @param raw
- * the raw tree data. The array is never modified.
- * @throws org.eclipse.jgit.errors.CorruptObjectException
- * if any error was detected.
- */
- public void checkTree(byte[] raw) throws CorruptObjectException {
- checkTree(idFor(OBJ_TREE, raw), raw);
- }
-
- /**
- * Check a canonical formatted tree for errors.
- *
- * @param id
- * identity of the object being checked.
- * @param raw
- * the raw tree data. The array is never modified.
- * @throws org.eclipse.jgit.errors.CorruptObjectException
- * if any error was detected.
- * @since 4.2
- */
- public void checkTree(@Nullable AnyObjectId id, byte[] raw)
- throws CorruptObjectException {
- final int sz = raw.length;
- int ptr = 0;
- int lastNameB = 0, lastNameE = 0, lastMode = 0;
- Set<String> normalized = windows || macosx
- ? new HashSet<>()
- : null;
-
- while (ptr < sz) {
- int thisMode = 0;
- for (;;) {
- if (ptr == sz) {
- throw new CorruptObjectException(
- JGitText.get().corruptObjectTruncatedInMode);
- }
- final byte c = raw[ptr++];
- if (' ' == c)
- break;
- if (c < '0' || c > '7') {
- throw new CorruptObjectException(
- JGitText.get().corruptObjectInvalidModeChar);
- }
- if (thisMode == 0 && c == '0') {
- report(ZERO_PADDED_FILEMODE, id,
- JGitText.get().corruptObjectInvalidModeStartsZero);
- }
- thisMode <<= 3;
- thisMode += c - '0';
- }
-
- if (FileMode.fromBits(thisMode).getObjectType() == OBJ_BAD) {
- throw new CorruptObjectException(MessageFormat.format(
- JGitText.get().corruptObjectInvalidMode2,
- Integer.valueOf(thisMode)));
- }
-
- final int thisNameB = ptr;
- ptr = scanPathSegment(raw, ptr, sz, id);
- if (ptr == sz || raw[ptr] != 0) {
- throw new CorruptObjectException(
- JGitText.get().corruptObjectTruncatedInName);
- }
- checkPathSegment2(raw, thisNameB, ptr, id);
- if (normalized != null) {
- if (!normalized.add(normalize(raw, thisNameB, ptr))) {
- report(DUPLICATE_ENTRIES, id,
- JGitText.get().corruptObjectDuplicateEntryNames);
- }
- } else if (duplicateName(raw, thisNameB, ptr)) {
- report(DUPLICATE_ENTRIES, id,
- JGitText.get().corruptObjectDuplicateEntryNames);
- }
-
- if (lastNameB != 0) {
- int cmp = compare(
- raw, lastNameB, lastNameE, lastMode,
- raw, thisNameB, ptr, thisMode);
- if (cmp > 0) {
- report(TREE_NOT_SORTED, id,
- JGitText.get().corruptObjectIncorrectSorting);
- }
- }
-
- lastNameB = thisNameB;
- lastNameE = ptr;
- lastMode = thisMode;
-
- ptr += 1 + OBJECT_ID_LENGTH;
- if (ptr > sz) {
- throw new CorruptObjectException(
- JGitText.get().corruptObjectTruncatedInObjectId);
- }
-
- if (ObjectId.zeroId().compareTo(raw, ptr - OBJECT_ID_LENGTH) == 0) {
- report(NULL_SHA1, id, JGitText.get().corruptObjectZeroId);
- }
-
- if (id != null && isGitmodules(raw, lastNameB, lastNameE, id)) {
- ObjectId blob = ObjectId.fromRaw(raw, ptr - OBJECT_ID_LENGTH);
- gitsubmodules.add(new GitmoduleEntry(id, blob));
- }
- }
- }
-
- private int scanPathSegment(byte[] raw, int ptr, int end,
- @Nullable AnyObjectId id) throws CorruptObjectException {
- for (; ptr < end; ptr++) {
- byte c = raw[ptr];
- if (c == 0) {
- return ptr;
- }
- if (c == '/') {
- report(FULL_PATHNAME, id,
- JGitText.get().corruptObjectNameContainsSlash);
- }
- if (windows && isInvalidOnWindows(c)) {
- if (c > 31) {
- throw new CorruptObjectException(String.format(
- JGitText.get().corruptObjectNameContainsChar,
- Byte.valueOf(c)));
- }
- throw new CorruptObjectException(String.format(
- JGitText.get().corruptObjectNameContainsByte,
- Integer.valueOf(c & 0xff)));
- }
- }
- return ptr;
- }
-
- @Nullable
- private ObjectId idFor(int objType, byte[] raw) {
- if (skipList != null) {
- try (ObjectInserter.Formatter fmt = new ObjectInserter.Formatter()) {
- return fmt.idFor(objType, raw);
- }
- }
- return null;
- }
-
- private void report(@NonNull ErrorType err, @Nullable AnyObjectId id,
- String why) throws CorruptObjectException {
- if (errors.contains(err)
- && (id == null || skipList == null || !skipList.contains(id))) {
- if (id != null) {
- throw new CorruptObjectException(err, id, why);
- }
- throw new CorruptObjectException(why);
- }
- }
-
- /**
- * Check tree path entry for validity.
- * <p>
- * Unlike {@link #checkPathSegment(byte[], int, int)}, this version scans a
- * multi-directory path string such as {@code "src/main.c"}.
- *
- * @param path
- * path string to scan.
- * @throws org.eclipse.jgit.errors.CorruptObjectException
- * path is invalid.
- * @since 3.6
- */
- public void checkPath(String path) throws CorruptObjectException {
- byte[] buf = Constants.encode(path);
- checkPath(buf, 0, buf.length);
- }
-
- /**
- * Check tree path entry for validity.
- * <p>
- * Unlike {@link #checkPathSegment(byte[], int, int)}, this version scans a
- * multi-directory path string such as {@code "src/main.c"}.
- *
- * @param raw
- * buffer to scan.
- * @param ptr
- * offset to first byte of the name.
- * @param end
- * offset to one past last byte of name.
- * @throws org.eclipse.jgit.errors.CorruptObjectException
- * path is invalid.
- * @since 3.6
- */
- public void checkPath(byte[] raw, int ptr, int end)
- throws CorruptObjectException {
- int start = ptr;
- for (; ptr < end; ptr++) {
- if (raw[ptr] == '/') {
- checkPathSegment(raw, start, ptr);
- start = ptr + 1;
- }
- }
- checkPathSegment(raw, start, end);
- }
-
- /**
- * Check tree path entry for validity.
- *
- * @param raw
- * buffer to scan.
- * @param ptr
- * offset to first byte of the name.
- * @param end
- * offset to one past last byte of name.
- * @throws org.eclipse.jgit.errors.CorruptObjectException
- * name is invalid.
- * @since 3.4
- */
- public void checkPathSegment(byte[] raw, int ptr, int end)
- throws CorruptObjectException {
- int e = scanPathSegment(raw, ptr, end, null);
- if (e < end && raw[e] == 0)
- throw new CorruptObjectException(
- JGitText.get().corruptObjectNameContainsNullByte);
- checkPathSegment2(raw, ptr, end, null);
- }
-
- private void checkPathSegment2(byte[] raw, int ptr, int end,
- @Nullable AnyObjectId id) throws CorruptObjectException {
- if (ptr == end) {
- report(EMPTY_NAME, id, JGitText.get().corruptObjectNameZeroLength);
- return;
- }
-
- if (raw[ptr] == '.') {
- switch (end - ptr) {
- case 1:
- report(HAS_DOT, id, JGitText.get().corruptObjectNameDot);
- break;
- case 2:
- if (raw[ptr + 1] == '.') {
- report(HAS_DOTDOT, id,
- JGitText.get().corruptObjectNameDotDot);
- }
- break;
- case 4:
- if (isGit(raw, ptr + 1)) {
- report(HAS_DOTGIT, id, String.format(
- JGitText.get().corruptObjectInvalidName,
- RawParseUtils.decode(raw, ptr, end)));
- }
- break;
- default:
- if (end - ptr > 4 && isNormalizedGit(raw, ptr + 1, end)) {
- report(HAS_DOTGIT, id, String.format(
- JGitText.get().corruptObjectInvalidName,
- RawParseUtils.decode(raw, ptr, end)));
- }
- }
- } else if (isGitTilde1(raw, ptr, end)) {
- report(HAS_DOTGIT, id, String.format(
- JGitText.get().corruptObjectInvalidName,
- RawParseUtils.decode(raw, ptr, end)));
- }
- if (macosx && isMacHFSGit(raw, ptr, end, id)) {
- report(HAS_DOTGIT, id, String.format(
- JGitText.get().corruptObjectInvalidNameIgnorableUnicode,
- RawParseUtils.decode(raw, ptr, end)));
- }
-
- if (windows) {
- // Windows ignores space and dot at end of file name.
- if (raw[end - 1] == ' ' || raw[end - 1] == '.') {
- report(WIN32_BAD_NAME, id, String.format(
- JGitText.get().corruptObjectInvalidNameEnd,
- Character.valueOf(((char) raw[end - 1]))));
- }
- if (end - ptr >= 3) {
- checkNotWindowsDevice(raw, ptr, end, id);
- }
- }
- }
-
- // Mac's HFS+ folds permutations of ".git" and Unicode ignorable characters
- // to ".git" therefore we should prevent such names
- private boolean isMacHFSPath(byte[] raw, int ptr, int end, byte[] path,
- @Nullable AnyObjectId id) throws CorruptObjectException {
- boolean ignorable = false;
- int g = 0;
- while (ptr < end) {
- switch (raw[ptr]) {
- case (byte) 0xe2: // http://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192
- if (!checkTruncatedIgnorableUTF8(raw, ptr, end, id)) {
- return false;
- }
- switch (raw[ptr + 1]) {
- case (byte) 0x80:
- switch (raw[ptr + 2]) {
- case (byte) 0x8c: // U+200C 0xe2808c ZERO WIDTH NON-JOINER
- case (byte) 0x8d: // U+200D 0xe2808d ZERO WIDTH JOINER
- case (byte) 0x8e: // U+200E 0xe2808e LEFT-TO-RIGHT MARK
- case (byte) 0x8f: // U+200F 0xe2808f RIGHT-TO-LEFT MARK
- case (byte) 0xaa: // U+202A 0xe280aa LEFT-TO-RIGHT EMBEDDING
- case (byte) 0xab: // U+202B 0xe280ab RIGHT-TO-LEFT EMBEDDING
- case (byte) 0xac: // U+202C 0xe280ac POP DIRECTIONAL FORMATTING
- case (byte) 0xad: // U+202D 0xe280ad LEFT-TO-RIGHT OVERRIDE
- case (byte) 0xae: // U+202E 0xe280ae RIGHT-TO-LEFT OVERRIDE
- ignorable = true;
- ptr += 3;
- continue;
- default:
- return false;
- }
- case (byte) 0x81:
- switch (raw[ptr + 2]) {
- case (byte) 0xaa: // U+206A 0xe281aa INHIBIT SYMMETRIC SWAPPING
- case (byte) 0xab: // U+206B 0xe281ab ACTIVATE SYMMETRIC SWAPPING
- case (byte) 0xac: // U+206C 0xe281ac INHIBIT ARABIC FORM SHAPING
- case (byte) 0xad: // U+206D 0xe281ad ACTIVATE ARABIC FORM SHAPING
- case (byte) 0xae: // U+206E 0xe281ae NATIONAL DIGIT SHAPES
- case (byte) 0xaf: // U+206F 0xe281af NOMINAL DIGIT SHAPES
- ignorable = true;
- ptr += 3;
- continue;
- default:
- return false;
- }
- default:
- return false;
- }
- case (byte) 0xef: // http://www.utf8-chartable.de/unicode-utf8-table.pl?start=65024
- if (!checkTruncatedIgnorableUTF8(raw, ptr, end, id)) {
- return false;
- }
- // U+FEFF 0xefbbbf ZERO WIDTH NO-BREAK SPACE
- if ((raw[ptr + 1] == (byte) 0xbb)
- && (raw[ptr + 2] == (byte) 0xbf)) {
- ignorable = true;
- ptr += 3;
- continue;
- }
- return false;
- default:
- if (g == path.length) {
- return false;
- }
- if (toLower(raw[ptr++]) != path[g++]) {
- return false;
- }
- }
- }
- if (g == path.length && ignorable) {
- return true;
- }
- return false;
- }
-
- private boolean isMacHFSGit(byte[] raw, int ptr, int end,
- @Nullable AnyObjectId id) throws CorruptObjectException {
- byte[] git = new byte[] { '.', 'g', 'i', 't' };
- return isMacHFSPath(raw, ptr, end, git, id);
- }
-
- private boolean isMacHFSGitmodules(byte[] raw, int ptr, int end,
- @Nullable AnyObjectId id) throws CorruptObjectException {
- return isMacHFSPath(raw, ptr, end, dotGitmodules, id);
- }
-
- private boolean checkTruncatedIgnorableUTF8(byte[] raw, int ptr, int end,
- @Nullable AnyObjectId id) throws CorruptObjectException {
- if ((ptr + 2) >= end) {
- report(BAD_UTF8, id, MessageFormat.format(
- JGitText.get().corruptObjectInvalidNameInvalidUtf8,
- toHexString(raw, ptr, end)));
- return false;
- }
- return true;
- }
-
- private static String toHexString(byte[] raw, int ptr, int end) {
- StringBuilder b = new StringBuilder("0x"); //$NON-NLS-1$
- for (int i = ptr; i < end; i++)
- b.append(String.format("%02x", Byte.valueOf(raw[i]))); //$NON-NLS-1$
- return b.toString();
- }
-
- private void checkNotWindowsDevice(byte[] raw, int ptr, int end,
- @Nullable AnyObjectId id) throws CorruptObjectException {
- switch (toLower(raw[ptr])) {
- case 'a': // AUX
- if (end - ptr >= 3
- && toLower(raw[ptr + 1]) == 'u'
- && toLower(raw[ptr + 2]) == 'x'
- && (end - ptr == 3 || raw[ptr + 3] == '.')) {
- report(WIN32_BAD_NAME, id,
- JGitText.get().corruptObjectInvalidNameAux);
- }
- break;
-
- case 'c': // CON, COM[1-9]
- if (end - ptr >= 3
- && toLower(raw[ptr + 2]) == 'n'
- && toLower(raw[ptr + 1]) == 'o'
- && (end - ptr == 3 || raw[ptr + 3] == '.')) {
- report(WIN32_BAD_NAME, id,
- JGitText.get().corruptObjectInvalidNameCon);
- }
- if (end - ptr >= 4
- && toLower(raw[ptr + 2]) == 'm'
- && toLower(raw[ptr + 1]) == 'o'
- && isPositiveDigit(raw[ptr + 3])
- && (end - ptr == 4 || raw[ptr + 4] == '.')) {
- report(WIN32_BAD_NAME, id, String.format(
- JGitText.get().corruptObjectInvalidNameCom,
- Character.valueOf(((char) raw[ptr + 3]))));
- }
- break;
-
- case 'l': // LPT[1-9]
- if (end - ptr >= 4
- && toLower(raw[ptr + 1]) == 'p'
- && toLower(raw[ptr + 2]) == 't'
- && isPositiveDigit(raw[ptr + 3])
- && (end - ptr == 4 || raw[ptr + 4] == '.')) {
- report(WIN32_BAD_NAME, id, String.format(
- JGitText.get().corruptObjectInvalidNameLpt,
- Character.valueOf(((char) raw[ptr + 3]))));
- }
- break;
-
- case 'n': // NUL
- if (end - ptr >= 3
- && toLower(raw[ptr + 1]) == 'u'
- && toLower(raw[ptr + 2]) == 'l'
- && (end - ptr == 3 || raw[ptr + 3] == '.')) {
- report(WIN32_BAD_NAME, id,
- JGitText.get().corruptObjectInvalidNameNul);
- }
- break;
-
- case 'p': // PRN
- if (end - ptr >= 3
- && toLower(raw[ptr + 1]) == 'r'
- && toLower(raw[ptr + 2]) == 'n'
- && (end - ptr == 3 || raw[ptr + 3] == '.')) {
- report(WIN32_BAD_NAME, id,
- JGitText.get().corruptObjectInvalidNamePrn);
- }
- break;
- }
- }
-
- private static boolean isInvalidOnWindows(byte c) {
- // Windows disallows "special" characters in a path component.
- switch (c) {
- case '"':
- case '*':
- case ':':
- case '<':
- case '>':
- case '?':
- case '\\':
- case '|':
- return true;
- }
- return 1 <= c && c <= 31;
- }
-
- private static boolean isGit(byte[] buf, int p) {
- return toLower(buf[p]) == 'g'
- && toLower(buf[p + 1]) == 'i'
- && toLower(buf[p + 2]) == 't';
- }
-
- /**
- * Check if the filename contained in buf[start:end] could be read as a
- * .gitmodules file when checked out to the working directory.
- *
- * This ought to be a simple comparison, but some filesystems have peculiar
- * rules for normalizing filenames:
- *
- * NTFS has backward-compatibility support for 8.3 synonyms of long file
- * names (see
- * https://web.archive.org/web/20160318181041/https://usn.pw/blog/gen/2015/06/09/filenames/
- * for details). NTFS is also case-insensitive.
- *
- * MacOS's HFS+ folds away ignorable Unicode characters in addition to case
- * folding.
- *
- * @param buf
- * byte array to decode
- * @param start
- * position where a supposed filename is starting
- * @param end
- * position where a supposed filename is ending
- * @param id
- * object id for error reporting
- *
- * @return true if the filename in buf could be a ".gitmodules" file
- * @throws CorruptObjectException
- */
- private boolean isGitmodules(byte[] buf, int start, int end, @Nullable AnyObjectId id)
- throws CorruptObjectException {
- // Simple cases first.
- if (end - start < 8) {
- return false;
- }
- return (end - start == dotGitmodules.length
- && RawParseUtils.match(buf, start, dotGitmodules) != -1)
- || (macosx && isMacHFSGitmodules(buf, start, end, id))
- || (windows && isNTFSGitmodules(buf, start, end));
- }
-
- private boolean matchLowerCase(byte[] b, int ptr, byte[] src) {
- if (ptr + src.length > b.length) {
- return false;
- }
- for (int i = 0; i < src.length; i++, ptr++) {
- if (toLower(b[ptr]) != src[i]) {
- return false;
- }
- }
- return true;
- }
-
- // .gitmodules, case-insensitive, or an 8.3 abbreviation of the same.
- private boolean isNTFSGitmodules(byte[] buf, int start, int end) {
- if (end - start == 11) {
- return matchLowerCase(buf, start, dotGitmodules);
- }
-
- if (end - start != 8) {
- return false;
- }
-
- // "gitmod" or a prefix of "gi7eba", followed by...
- byte[] gitmod = new byte[]{'g', 'i', 't', 'm', 'o', 'd', '~'};
- if (matchLowerCase(buf, start, gitmod)) {
- start += 6;
- } else {
- byte[] gi7eba = new byte[]{'g', 'i', '7', 'e', 'b', 'a'};
- for (int i = 0; i < gi7eba.length; i++, start++) {
- byte c = (byte) toLower(buf[start]);
- if (c == '~') {
- break;
- }
- if (c != gi7eba[i]) {
- return false;
- }
- }
- }
-
- // ... ~ and a number
- if (end - start < 2) {
- return false;
- }
- if (buf[start] != '~') {
- return false;
- }
- start++;
- if (buf[start] < '1' || buf[start] > '9') {
- return false;
- }
- start++;
- for (; start != end; start++) {
- if (buf[start] < '0' || buf[start] > '9') {
- return false;
- }
- }
- return true;
- }
-
- private static boolean isGitTilde1(byte[] buf, int p, int end) {
- if (end - p != 5)
- return false;
- return toLower(buf[p]) == 'g' && toLower(buf[p + 1]) == 'i'
- && toLower(buf[p + 2]) == 't' && buf[p + 3] == '~'
- && buf[p + 4] == '1';
- }
-
- private static boolean isNormalizedGit(byte[] raw, int ptr, int end) {
- if (isGit(raw, ptr)) {
- int dots = 0;
- boolean space = false;
- int p = end - 1;
- for (; (ptr + 2) < p; p--) {
- if (raw[p] == '.')
- dots++;
- else if (raw[p] == ' ')
- space = true;
- else
- break;
- }
- return p == ptr + 2 && (dots == 1 || space);
- }
- return false;
- }
-
- private boolean match(byte[] b, byte[] src) {
- int r = RawParseUtils.match(b, bufPtr.value, src);
- if (r < 0) {
- return false;
- }
- bufPtr.value = r;
- return true;
- }
-
- private static char toLower(byte b) {
- if ('A' <= b && b <= 'Z')
- return (char) (b + ('a' - 'A'));
- return (char) b;
- }
-
- private static boolean isPositiveDigit(byte b) {
- return '1' <= b && b <= '9';
- }
-
- /**
- * Create a new {@link org.eclipse.jgit.lib.BlobObjectChecker}.
- *
- * @return new BlobObjectChecker or null if it's not provided.
- * @since 4.9
- */
- @Nullable
- public BlobObjectChecker newBlobObjectChecker() {
- return null;
- }
-
- /**
- * Check a blob for errors.
- *
- * <p>
- * This may not be called from PackParser in some cases. Use
- * {@link #newBlobObjectChecker} instead.
- *
- * @param raw
- * the blob data. The array is never modified.
- * @throws org.eclipse.jgit.errors.CorruptObjectException
- * if any error was detected.
- */
- public void checkBlob(byte[] raw) throws CorruptObjectException {
- // We can always assume the blob is valid.
- }
-
- private String normalize(byte[] raw, int ptr, int end) {
- String n = RawParseUtils.decode(raw, ptr, end).toLowerCase(Locale.US);
- return macosx ? Normalizer.normalize(n, Normalizer.Form.NFC) : n;
- }
-
- /**
- * Get the list of".gitmodules" files found in the pack. For each, report
- * its blob id (e.g. to validate its contents) and the tree where it was
- * found (e.g. to check if it is in the root)
- *
- * @return List of pairs of ids <tree, blob>
- *
- * @since 4.7.5
- */
- public List<GitmoduleEntry> getGitsubmodules() {
- return gitsubmodules;
- }
- }
|