You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ObjectChecker.java 32KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099
  1. /*
  2. * Copyright (C) 2008-2010, Google Inc.
  3. * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
  4. * and other copyright owners as documented in the project's IP log.
  5. *
  6. * This program and the accompanying materials are made available
  7. * under the terms of the Eclipse Distribution License v1.0 which
  8. * accompanies this distribution, is reproduced below, and is
  9. * available at http://www.eclipse.org/org/documents/edl-v10.php
  10. *
  11. * All rights reserved.
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. *
  20. * - Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials provided
  23. * with the distribution.
  24. *
  25. * - Neither the name of the Eclipse Foundation, Inc. nor the
  26. * names of its contributors may be used to endorse or promote
  27. * products derived from this software without specific prior
  28. * written permission.
  29. *
  30. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  31. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  32. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  36. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  37. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  38. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  39. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  40. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  42. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. */
  44. package org.eclipse.jgit.lib;
  45. import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
  46. import static org.eclipse.jgit.lib.Constants.OBJECT_ID_STRING_LENGTH;
  47. import static org.eclipse.jgit.lib.Constants.OBJ_BAD;
  48. import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
  49. import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT;
  50. import static org.eclipse.jgit.lib.Constants.OBJ_TAG;
  51. import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
  52. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_DATE;
  53. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_EMAIL;
  54. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_OBJECT_SHA1;
  55. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_PARENT_SHA1;
  56. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_TIMEZONE;
  57. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_TREE_SHA1;
  58. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_UTF8;
  59. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.DUPLICATE_ENTRIES;
  60. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.EMPTY_NAME;
  61. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.FULL_PATHNAME;
  62. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOT;
  63. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOTDOT;
  64. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOTGIT;
  65. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_AUTHOR;
  66. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_COMMITTER;
  67. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_EMAIL;
  68. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_OBJECT;
  69. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_SPACE_BEFORE_DATE;
  70. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TAG_ENTRY;
  71. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TREE;
  72. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TYPE_ENTRY;
  73. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.NULL_SHA1;
  74. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.TREE_NOT_SORTED;
  75. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.UNKNOWN_TYPE;
  76. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.WIN32_BAD_NAME;
  77. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.ZERO_PADDED_FILEMODE;
  78. import static org.eclipse.jgit.util.RawParseUtils.nextLF;
  79. import static org.eclipse.jgit.util.RawParseUtils.parseBase10;
  80. import java.text.MessageFormat;
  81. import java.text.Normalizer;
  82. import java.util.EnumSet;
  83. import java.util.HashSet;
  84. import java.util.Locale;
  85. import java.util.Set;
  86. import org.eclipse.jgit.annotations.NonNull;
  87. import org.eclipse.jgit.annotations.Nullable;
  88. import org.eclipse.jgit.errors.CorruptObjectException;
  89. import org.eclipse.jgit.internal.JGitText;
  90. import org.eclipse.jgit.util.MutableInteger;
  91. import org.eclipse.jgit.util.RawParseUtils;
  92. import org.eclipse.jgit.util.StringUtils;
  93. /**
  94. * Verifies that an object is formatted correctly.
  95. * <p>
  96. * Verifications made by this class only check that the fields of an object are
  97. * formatted correctly. The ObjectId checksum of the object is not verified, and
  98. * connectivity links between objects are also not verified. Its assumed that
  99. * the caller can provide both of these validations on its own.
  100. * <p>
  101. * Instances of this class are not thread safe, but they may be reused to
  102. * perform multiple object validations.
  103. */
  104. public class ObjectChecker {
  105. /** Header "tree " */
  106. public static final byte[] tree = Constants.encodeASCII("tree "); //$NON-NLS-1$
  107. /** Header "parent " */
  108. public static final byte[] parent = Constants.encodeASCII("parent "); //$NON-NLS-1$
  109. /** Header "author " */
  110. public static final byte[] author = Constants.encodeASCII("author "); //$NON-NLS-1$
  111. /** Header "committer " */
  112. public static final byte[] committer = Constants.encodeASCII("committer "); //$NON-NLS-1$
  113. /** Header "encoding " */
  114. public static final byte[] encoding = Constants.encodeASCII("encoding "); //$NON-NLS-1$
  115. /** Header "object " */
  116. public static final byte[] object = Constants.encodeASCII("object "); //$NON-NLS-1$
  117. /** Header "type " */
  118. public static final byte[] type = Constants.encodeASCII("type "); //$NON-NLS-1$
  119. /** Header "tag " */
  120. public static final byte[] tag = Constants.encodeASCII("tag "); //$NON-NLS-1$
  121. /** Header "tagger " */
  122. public static final byte[] tagger = Constants.encodeASCII("tagger "); //$NON-NLS-1$
  123. /**
  124. * Potential issues identified by the checker.
  125. *
  126. * @since 4.2
  127. */
  128. public enum ErrorType {
  129. // @formatter:off
  130. // These names match git-core so that fsck section keys also match.
  131. /***/ NULL_SHA1,
  132. /***/ DUPLICATE_ENTRIES,
  133. /***/ TREE_NOT_SORTED,
  134. /***/ ZERO_PADDED_FILEMODE,
  135. /***/ EMPTY_NAME,
  136. /***/ FULL_PATHNAME,
  137. /***/ HAS_DOT,
  138. /***/ HAS_DOTDOT,
  139. /***/ HAS_DOTGIT,
  140. /***/ BAD_OBJECT_SHA1,
  141. /***/ BAD_PARENT_SHA1,
  142. /***/ BAD_TREE_SHA1,
  143. /***/ MISSING_AUTHOR,
  144. /***/ MISSING_COMMITTER,
  145. /***/ MISSING_OBJECT,
  146. /***/ MISSING_TREE,
  147. /***/ MISSING_TYPE_ENTRY,
  148. /***/ MISSING_TAG_ENTRY,
  149. /***/ BAD_DATE,
  150. /***/ BAD_EMAIL,
  151. /***/ BAD_TIMEZONE,
  152. /***/ MISSING_EMAIL,
  153. /***/ MISSING_SPACE_BEFORE_DATE,
  154. /***/ UNKNOWN_TYPE,
  155. // These are unique to JGit.
  156. /***/ WIN32_BAD_NAME,
  157. /***/ BAD_UTF8;
  158. // @formatter:on
  159. /** @return camelCaseVersion of the name. */
  160. public String getMessageId() {
  161. String n = name();
  162. StringBuilder r = new StringBuilder(n.length());
  163. for (int i = 0; i < n.length(); i++) {
  164. char c = n.charAt(i);
  165. if (c != '_') {
  166. r.append(StringUtils.toLowerCase(c));
  167. } else {
  168. r.append(n.charAt(++i));
  169. }
  170. }
  171. return r.toString();
  172. }
  173. }
  174. private final MutableObjectId tempId = new MutableObjectId();
  175. private final MutableInteger bufPtr = new MutableInteger();
  176. private EnumSet<ErrorType> errors = EnumSet.allOf(ErrorType.class);
  177. private ObjectIdSet skipList;
  178. private boolean allowInvalidPersonIdent;
  179. private boolean windows;
  180. private boolean macosx;
  181. /**
  182. * Enable accepting specific malformed (but not horribly broken) objects.
  183. *
  184. * @param objects
  185. * collection of object names known to be broken in a non-fatal
  186. * way that should be ignored by the checker.
  187. * @return {@code this}
  188. * @since 4.2
  189. */
  190. public ObjectChecker setSkipList(@Nullable ObjectIdSet objects) {
  191. skipList = objects;
  192. return this;
  193. }
  194. /**
  195. * Configure error types to be ignored across all objects.
  196. *
  197. * @param ids
  198. * error types to ignore. The caller's set is copied.
  199. * @return {@code this}
  200. * @since 4.2
  201. */
  202. public ObjectChecker setIgnore(@Nullable Set<ErrorType> ids) {
  203. errors = EnumSet.allOf(ErrorType.class);
  204. if (ids != null) {
  205. errors.removeAll(ids);
  206. }
  207. return this;
  208. }
  209. /**
  210. * Add message type to be ignored across all objects.
  211. *
  212. * @param id
  213. * error type to ignore.
  214. * @param ignore
  215. * true to ignore this error; false to treat the error as an
  216. * error and throw.
  217. * @return {@code this}
  218. * @since 4.2
  219. */
  220. public ObjectChecker setIgnore(ErrorType id, boolean ignore) {
  221. if (ignore) {
  222. errors.remove(id);
  223. } else {
  224. errors.add(id);
  225. }
  226. return this;
  227. }
  228. /**
  229. * Enable accepting leading zero mode in tree entries.
  230. * <p>
  231. * Some broken Git libraries generated leading zeros in the mode part of
  232. * tree entries. This is technically incorrect but gracefully allowed by
  233. * git-core. JGit rejects such trees by default, but may need to accept
  234. * them on broken histories.
  235. * <p>
  236. * Same as {@code setIgnore(ZERO_PADDED_FILEMODE, allow)}.
  237. *
  238. * @param allow allow leading zero mode.
  239. * @return {@code this}.
  240. * @since 3.4
  241. */
  242. public ObjectChecker setAllowLeadingZeroFileMode(boolean allow) {
  243. return setIgnore(ZERO_PADDED_FILEMODE, allow);
  244. }
  245. /**
  246. * Enable accepting invalid author, committer and tagger identities.
  247. * <p>
  248. * Some broken Git versions/libraries allowed users to create commits and
  249. * tags with invalid formatting between the name, email and timestamp.
  250. *
  251. * @param allow
  252. * if true accept invalid person identity strings.
  253. * @return {@code this}.
  254. * @since 4.0
  255. */
  256. public ObjectChecker setAllowInvalidPersonIdent(boolean allow) {
  257. allowInvalidPersonIdent = allow;
  258. return this;
  259. }
  260. /**
  261. * Restrict trees to only names legal on Windows platforms.
  262. * <p>
  263. * Also rejects any mixed case forms of reserved names ({@code .git}).
  264. *
  265. * @param win true if Windows name checking should be performed.
  266. * @return {@code this}.
  267. * @since 3.4
  268. */
  269. public ObjectChecker setSafeForWindows(boolean win) {
  270. windows = win;
  271. return this;
  272. }
  273. /**
  274. * Restrict trees to only names legal on Mac OS X platforms.
  275. * <p>
  276. * Rejects any mixed case forms of reserved names ({@code .git})
  277. * for users working on HFS+ in case-insensitive (default) mode.
  278. *
  279. * @param mac true if Mac OS X name checking should be performed.
  280. * @return {@code this}.
  281. * @since 3.4
  282. */
  283. public ObjectChecker setSafeForMacOS(boolean mac) {
  284. macosx = mac;
  285. return this;
  286. }
  287. /**
  288. * Check an object for parsing errors.
  289. *
  290. * @param objType
  291. * type of the object. Must be a valid object type code in
  292. * {@link Constants}.
  293. * @param raw
  294. * the raw data which comprises the object. This should be in the
  295. * canonical format (that is the format used to generate the
  296. * ObjectId of the object). The array is never modified.
  297. * @throws CorruptObjectException
  298. * if an error is identified.
  299. */
  300. public void check(int objType, byte[] raw)
  301. throws CorruptObjectException {
  302. check(idFor(objType, raw), objType, raw);
  303. }
  304. /**
  305. * Check an object for parsing errors.
  306. *
  307. * @param id
  308. * identify of the object being checked.
  309. * @param objType
  310. * type of the object. Must be a valid object type code in
  311. * {@link Constants}.
  312. * @param raw
  313. * the raw data which comprises the object. This should be in the
  314. * canonical format (that is the format used to generate the
  315. * ObjectId of the object). The array is never modified.
  316. * @throws CorruptObjectException
  317. * if an error is identified.
  318. * @since 4.2
  319. */
  320. public void check(@Nullable AnyObjectId id, int objType, byte[] raw)
  321. throws CorruptObjectException {
  322. switch (objType) {
  323. case OBJ_COMMIT:
  324. checkCommit(id, raw);
  325. break;
  326. case OBJ_TAG:
  327. checkTag(id, raw);
  328. break;
  329. case OBJ_TREE:
  330. checkTree(id, raw);
  331. break;
  332. case OBJ_BLOB:
  333. checkBlob(raw);
  334. break;
  335. default:
  336. report(UNKNOWN_TYPE, id, MessageFormat.format(
  337. JGitText.get().corruptObjectInvalidType2,
  338. Integer.valueOf(objType)));
  339. }
  340. }
  341. private boolean checkId(byte[] raw) {
  342. int p = bufPtr.value;
  343. try {
  344. tempId.fromString(raw, p);
  345. } catch (IllegalArgumentException e) {
  346. bufPtr.value = nextLF(raw, p);
  347. return false;
  348. }
  349. p += OBJECT_ID_STRING_LENGTH;
  350. if (raw[p] == '\n') {
  351. bufPtr.value = p + 1;
  352. return true;
  353. }
  354. bufPtr.value = nextLF(raw, p);
  355. return false;
  356. }
  357. private void checkPersonIdent(byte[] raw, @Nullable AnyObjectId id)
  358. throws CorruptObjectException {
  359. if (allowInvalidPersonIdent) {
  360. bufPtr.value = nextLF(raw, bufPtr.value);
  361. return;
  362. }
  363. final int emailB = nextLF(raw, bufPtr.value, '<');
  364. if (emailB == bufPtr.value || raw[emailB - 1] != '<') {
  365. report(MISSING_EMAIL, id, JGitText.get().corruptObjectMissingEmail);
  366. bufPtr.value = nextLF(raw, bufPtr.value);
  367. return;
  368. }
  369. final int emailE = nextLF(raw, emailB, '>');
  370. if (emailE == emailB || raw[emailE - 1] != '>') {
  371. report(BAD_EMAIL, id, JGitText.get().corruptObjectBadEmail);
  372. bufPtr.value = nextLF(raw, bufPtr.value);
  373. return;
  374. }
  375. if (emailE == raw.length || raw[emailE] != ' ') {
  376. report(MISSING_SPACE_BEFORE_DATE, id,
  377. JGitText.get().corruptObjectBadDate);
  378. bufPtr.value = nextLF(raw, bufPtr.value);
  379. return;
  380. }
  381. parseBase10(raw, emailE + 1, bufPtr); // when
  382. if (emailE + 1 == bufPtr.value || bufPtr.value == raw.length
  383. || raw[bufPtr.value] != ' ') {
  384. report(BAD_DATE, id, JGitText.get().corruptObjectBadDate);
  385. bufPtr.value = nextLF(raw, bufPtr.value);
  386. return;
  387. }
  388. int p = bufPtr.value + 1;
  389. parseBase10(raw, p, bufPtr); // tz offset
  390. if (p == bufPtr.value) {
  391. report(BAD_TIMEZONE, id, JGitText.get().corruptObjectBadTimezone);
  392. bufPtr.value = nextLF(raw, bufPtr.value);
  393. return;
  394. }
  395. p = bufPtr.value;
  396. if (raw[p] == '\n') {
  397. bufPtr.value = p + 1;
  398. } else {
  399. report(BAD_TIMEZONE, id, JGitText.get().corruptObjectBadTimezone);
  400. bufPtr.value = nextLF(raw, p);
  401. }
  402. }
  403. /**
  404. * Check a commit for errors.
  405. *
  406. * @param raw
  407. * the commit data. The array is never modified.
  408. * @throws CorruptObjectException
  409. * if any error was detected.
  410. */
  411. public void checkCommit(byte[] raw) throws CorruptObjectException {
  412. checkCommit(idFor(OBJ_COMMIT, raw), raw);
  413. }
  414. /**
  415. * Check a commit for errors.
  416. *
  417. * @param id
  418. * identity of the object being checked.
  419. * @param raw
  420. * the commit data. The array is never modified.
  421. * @throws CorruptObjectException
  422. * if any error was detected.
  423. * @since 4.2
  424. */
  425. public void checkCommit(@Nullable AnyObjectId id, byte[] raw)
  426. throws CorruptObjectException {
  427. bufPtr.value = 0;
  428. if (!match(raw, tree)) {
  429. report(MISSING_TREE, id, JGitText.get().corruptObjectNotreeHeader);
  430. } else if (!checkId(raw)) {
  431. report(BAD_TREE_SHA1, id, JGitText.get().corruptObjectInvalidTree);
  432. }
  433. while (match(raw, parent)) {
  434. if (!checkId(raw)) {
  435. report(BAD_PARENT_SHA1, id,
  436. JGitText.get().corruptObjectInvalidParent);
  437. }
  438. }
  439. if (match(raw, author)) {
  440. checkPersonIdent(raw, id);
  441. } else {
  442. report(MISSING_AUTHOR, id, JGitText.get().corruptObjectNoAuthor);
  443. }
  444. if (match(raw, committer)) {
  445. checkPersonIdent(raw, id);
  446. } else {
  447. report(MISSING_COMMITTER, id,
  448. JGitText.get().corruptObjectNoCommitter);
  449. }
  450. }
  451. /**
  452. * Check an annotated tag for errors.
  453. *
  454. * @param raw
  455. * the tag data. The array is never modified.
  456. * @throws CorruptObjectException
  457. * if any error was detected.
  458. */
  459. public void checkTag(byte[] raw) throws CorruptObjectException {
  460. checkTag(idFor(OBJ_TAG, raw), raw);
  461. }
  462. /**
  463. * Check an annotated tag for errors.
  464. *
  465. * @param id
  466. * identity of the object being checked.
  467. * @param raw
  468. * the tag data. The array is never modified.
  469. * @throws CorruptObjectException
  470. * if any error was detected.
  471. * @since 4.2
  472. */
  473. public void checkTag(@Nullable AnyObjectId id, byte[] raw)
  474. throws CorruptObjectException {
  475. bufPtr.value = 0;
  476. if (!match(raw, object)) {
  477. report(MISSING_OBJECT, id,
  478. JGitText.get().corruptObjectNoObjectHeader);
  479. } else if (!checkId(raw)) {
  480. report(BAD_OBJECT_SHA1, id,
  481. JGitText.get().corruptObjectInvalidObject);
  482. }
  483. if (!match(raw, type)) {
  484. report(MISSING_TYPE_ENTRY, id,
  485. JGitText.get().corruptObjectNoTypeHeader);
  486. }
  487. bufPtr.value = nextLF(raw, bufPtr.value);
  488. if (!match(raw, tag)) {
  489. report(MISSING_TAG_ENTRY, id,
  490. JGitText.get().corruptObjectNoTagHeader);
  491. }
  492. bufPtr.value = nextLF(raw, bufPtr.value);
  493. if (match(raw, tagger)) {
  494. checkPersonIdent(raw, id);
  495. }
  496. }
  497. private static int lastPathChar(final int mode) {
  498. return FileMode.TREE.equals(mode) ? '/' : '\0';
  499. }
  500. private static int pathCompare(final byte[] raw, int aPos, final int aEnd,
  501. final int aMode, int bPos, final int bEnd, final int bMode) {
  502. while (aPos < aEnd && bPos < bEnd) {
  503. final int cmp = (raw[aPos++] & 0xff) - (raw[bPos++] & 0xff);
  504. if (cmp != 0)
  505. return cmp;
  506. }
  507. if (aPos < aEnd)
  508. return (raw[aPos] & 0xff) - lastPathChar(bMode);
  509. if (bPos < bEnd)
  510. return lastPathChar(aMode) - (raw[bPos] & 0xff);
  511. return 0;
  512. }
  513. private static boolean duplicateName(final byte[] raw,
  514. final int thisNamePos, final int thisNameEnd) {
  515. final int sz = raw.length;
  516. int nextPtr = thisNameEnd + 1 + Constants.OBJECT_ID_LENGTH;
  517. for (;;) {
  518. int nextMode = 0;
  519. for (;;) {
  520. if (nextPtr >= sz)
  521. return false;
  522. final byte c = raw[nextPtr++];
  523. if (' ' == c)
  524. break;
  525. nextMode <<= 3;
  526. nextMode += c - '0';
  527. }
  528. final int nextNamePos = nextPtr;
  529. for (;;) {
  530. if (nextPtr == sz)
  531. return false;
  532. final byte c = raw[nextPtr++];
  533. if (c == 0)
  534. break;
  535. }
  536. if (nextNamePos + 1 == nextPtr)
  537. return false;
  538. final int cmp = pathCompare(raw, thisNamePos, thisNameEnd,
  539. FileMode.TREE.getBits(), nextNamePos, nextPtr - 1, nextMode);
  540. if (cmp < 0)
  541. return false;
  542. else if (cmp == 0)
  543. return true;
  544. nextPtr += Constants.OBJECT_ID_LENGTH;
  545. }
  546. }
  547. /**
  548. * Check a canonical formatted tree for errors.
  549. *
  550. * @param raw
  551. * the raw tree data. The array is never modified.
  552. * @throws CorruptObjectException
  553. * if any error was detected.
  554. */
  555. public void checkTree(byte[] raw) throws CorruptObjectException {
  556. checkTree(idFor(OBJ_TREE, raw), raw);
  557. }
  558. /**
  559. * Check a canonical formatted tree for errors.
  560. *
  561. * @param id
  562. * identity of the object being checked.
  563. * @param raw
  564. * the raw tree data. The array is never modified.
  565. * @throws CorruptObjectException
  566. * if any error was detected.
  567. * @since 4.2
  568. */
  569. public void checkTree(@Nullable AnyObjectId id, byte[] raw)
  570. throws CorruptObjectException {
  571. final int sz = raw.length;
  572. int ptr = 0;
  573. int lastNameB = 0, lastNameE = 0, lastMode = 0;
  574. Set<String> normalized = windows || macosx
  575. ? new HashSet<String>()
  576. : null;
  577. while (ptr < sz) {
  578. int thisMode = 0;
  579. for (;;) {
  580. if (ptr == sz) {
  581. throw new CorruptObjectException(
  582. JGitText.get().corruptObjectTruncatedInMode);
  583. }
  584. final byte c = raw[ptr++];
  585. if (' ' == c)
  586. break;
  587. if (c < '0' || c > '7') {
  588. throw new CorruptObjectException(
  589. JGitText.get().corruptObjectInvalidModeChar);
  590. }
  591. if (thisMode == 0 && c == '0') {
  592. report(ZERO_PADDED_FILEMODE, id,
  593. JGitText.get().corruptObjectInvalidModeStartsZero);
  594. }
  595. thisMode <<= 3;
  596. thisMode += c - '0';
  597. }
  598. if (FileMode.fromBits(thisMode).getObjectType() == OBJ_BAD) {
  599. throw new CorruptObjectException(MessageFormat.format(
  600. JGitText.get().corruptObjectInvalidMode2,
  601. Integer.valueOf(thisMode)));
  602. }
  603. final int thisNameB = ptr;
  604. ptr = scanPathSegment(raw, ptr, sz, id);
  605. if (ptr == sz || raw[ptr] != 0) {
  606. throw new CorruptObjectException(
  607. JGitText.get().corruptObjectTruncatedInName);
  608. }
  609. checkPathSegment2(raw, thisNameB, ptr, id);
  610. if (normalized != null) {
  611. if (!normalized.add(normalize(raw, thisNameB, ptr))) {
  612. report(DUPLICATE_ENTRIES, id,
  613. JGitText.get().corruptObjectDuplicateEntryNames);
  614. }
  615. } else if (duplicateName(raw, thisNameB, ptr)) {
  616. report(DUPLICATE_ENTRIES, id,
  617. JGitText.get().corruptObjectDuplicateEntryNames);
  618. }
  619. if (lastNameB != 0) {
  620. final int cmp = pathCompare(raw, lastNameB, lastNameE,
  621. lastMode, thisNameB, ptr, thisMode);
  622. if (cmp > 0) {
  623. report(TREE_NOT_SORTED, id,
  624. JGitText.get().corruptObjectIncorrectSorting);
  625. }
  626. }
  627. lastNameB = thisNameB;
  628. lastNameE = ptr;
  629. lastMode = thisMode;
  630. ptr += 1 + OBJECT_ID_LENGTH;
  631. if (ptr > sz) {
  632. throw new CorruptObjectException(
  633. JGitText.get().corruptObjectTruncatedInObjectId);
  634. }
  635. if (ObjectId.zeroId().compareTo(raw, ptr - OBJECT_ID_LENGTH) == 0) {
  636. report(NULL_SHA1, id, JGitText.get().corruptObjectZeroId);
  637. }
  638. }
  639. }
  640. private int scanPathSegment(byte[] raw, int ptr, int end,
  641. @Nullable AnyObjectId id) throws CorruptObjectException {
  642. for (; ptr < end; ptr++) {
  643. byte c = raw[ptr];
  644. if (c == 0) {
  645. return ptr;
  646. }
  647. if (c == '/') {
  648. report(FULL_PATHNAME, id,
  649. JGitText.get().corruptObjectNameContainsSlash);
  650. }
  651. if (windows && isInvalidOnWindows(c)) {
  652. if (c > 31) {
  653. throw new CorruptObjectException(String.format(
  654. JGitText.get().corruptObjectNameContainsChar,
  655. Byte.valueOf(c)));
  656. }
  657. throw new CorruptObjectException(String.format(
  658. JGitText.get().corruptObjectNameContainsByte,
  659. Integer.valueOf(c & 0xff)));
  660. }
  661. }
  662. return ptr;
  663. }
  664. @SuppressWarnings("resource")
  665. @Nullable
  666. private ObjectId idFor(int objType, byte[] raw) {
  667. if (skipList != null) {
  668. return new ObjectInserter.Formatter().idFor(objType, raw);
  669. }
  670. return null;
  671. }
  672. private void report(@NonNull ErrorType err, @Nullable AnyObjectId id,
  673. String why) throws CorruptObjectException {
  674. if (errors.contains(err)
  675. && (id == null || skipList == null || !skipList.contains(id))) {
  676. if (id != null) {
  677. throw new CorruptObjectException(err, id, why);
  678. }
  679. throw new CorruptObjectException(why);
  680. }
  681. }
  682. /**
  683. * Check tree path entry for validity.
  684. * <p>
  685. * Unlike {@link #checkPathSegment(byte[], int, int)}, this version
  686. * scans a multi-directory path string such as {@code "src/main.c"}.
  687. *
  688. * @param path path string to scan.
  689. * @throws CorruptObjectException path is invalid.
  690. * @since 3.6
  691. */
  692. public void checkPath(String path) throws CorruptObjectException {
  693. byte[] buf = Constants.encode(path);
  694. checkPath(buf, 0, buf.length);
  695. }
  696. /**
  697. * Check tree path entry for validity.
  698. * <p>
  699. * Unlike {@link #checkPathSegment(byte[], int, int)}, this version
  700. * scans a multi-directory path string such as {@code "src/main.c"}.
  701. *
  702. * @param raw buffer to scan.
  703. * @param ptr offset to first byte of the name.
  704. * @param end offset to one past last byte of name.
  705. * @throws CorruptObjectException path is invalid.
  706. * @since 3.6
  707. */
  708. public void checkPath(byte[] raw, int ptr, int end)
  709. throws CorruptObjectException {
  710. int start = ptr;
  711. for (; ptr < end; ptr++) {
  712. if (raw[ptr] == '/') {
  713. checkPathSegment(raw, start, ptr);
  714. start = ptr + 1;
  715. }
  716. }
  717. checkPathSegment(raw, start, end);
  718. }
  719. /**
  720. * Check tree path entry for validity.
  721. *
  722. * @param raw buffer to scan.
  723. * @param ptr offset to first byte of the name.
  724. * @param end offset to one past last byte of name.
  725. * @throws CorruptObjectException name is invalid.
  726. * @since 3.4
  727. */
  728. public void checkPathSegment(byte[] raw, int ptr, int end)
  729. throws CorruptObjectException {
  730. int e = scanPathSegment(raw, ptr, end, null);
  731. if (e < end && raw[e] == 0)
  732. throw new CorruptObjectException(
  733. JGitText.get().corruptObjectNameContainsNullByte);
  734. checkPathSegment2(raw, ptr, end, null);
  735. }
  736. private void checkPathSegment2(byte[] raw, int ptr, int end,
  737. @Nullable AnyObjectId id) throws CorruptObjectException {
  738. if (ptr == end) {
  739. report(EMPTY_NAME, id, JGitText.get().corruptObjectNameZeroLength);
  740. return;
  741. }
  742. if (raw[ptr] == '.') {
  743. switch (end - ptr) {
  744. case 1:
  745. report(HAS_DOT, id, JGitText.get().corruptObjectNameDot);
  746. break;
  747. case 2:
  748. if (raw[ptr + 1] == '.') {
  749. report(HAS_DOTDOT, id,
  750. JGitText.get().corruptObjectNameDotDot);
  751. }
  752. break;
  753. case 4:
  754. if (isGit(raw, ptr + 1)) {
  755. report(HAS_DOTGIT, id, String.format(
  756. JGitText.get().corruptObjectInvalidName,
  757. RawParseUtils.decode(raw, ptr, end)));
  758. }
  759. break;
  760. default:
  761. if (end - ptr > 4 && isNormalizedGit(raw, ptr + 1, end)) {
  762. report(HAS_DOTGIT, id, String.format(
  763. JGitText.get().corruptObjectInvalidName,
  764. RawParseUtils.decode(raw, ptr, end)));
  765. }
  766. }
  767. } else if (isGitTilde1(raw, ptr, end)) {
  768. report(HAS_DOTGIT, id, String.format(
  769. JGitText.get().corruptObjectInvalidName,
  770. RawParseUtils.decode(raw, ptr, end)));
  771. }
  772. if (macosx && isMacHFSGit(raw, ptr, end, id)) {
  773. report(HAS_DOTGIT, id, String.format(
  774. JGitText.get().corruptObjectInvalidNameIgnorableUnicode,
  775. RawParseUtils.decode(raw, ptr, end)));
  776. }
  777. if (windows) {
  778. // Windows ignores space and dot at end of file name.
  779. if (raw[end - 1] == ' ' || raw[end - 1] == '.') {
  780. report(WIN32_BAD_NAME, id, String.format(
  781. JGitText.get().corruptObjectInvalidNameEnd,
  782. Character.valueOf(((char) raw[end - 1]))));
  783. }
  784. if (end - ptr >= 3) {
  785. checkNotWindowsDevice(raw, ptr, end, id);
  786. }
  787. }
  788. }
  789. // Mac's HFS+ folds permutations of ".git" and Unicode ignorable characters
  790. // to ".git" therefore we should prevent such names
  791. private boolean isMacHFSGit(byte[] raw, int ptr, int end,
  792. @Nullable AnyObjectId id) throws CorruptObjectException {
  793. boolean ignorable = false;
  794. byte[] git = new byte[] { '.', 'g', 'i', 't' };
  795. int g = 0;
  796. while (ptr < end) {
  797. switch (raw[ptr]) {
  798. case (byte) 0xe2: // http://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192
  799. if (!checkTruncatedIgnorableUTF8(raw, ptr, end, id)) {
  800. return false;
  801. }
  802. switch (raw[ptr + 1]) {
  803. case (byte) 0x80:
  804. switch (raw[ptr + 2]) {
  805. case (byte) 0x8c: // U+200C 0xe2808c ZERO WIDTH NON-JOINER
  806. case (byte) 0x8d: // U+200D 0xe2808d ZERO WIDTH JOINER
  807. case (byte) 0x8e: // U+200E 0xe2808e LEFT-TO-RIGHT MARK
  808. case (byte) 0x8f: // U+200F 0xe2808f RIGHT-TO-LEFT MARK
  809. case (byte) 0xaa: // U+202A 0xe280aa LEFT-TO-RIGHT EMBEDDING
  810. case (byte) 0xab: // U+202B 0xe280ab RIGHT-TO-LEFT EMBEDDING
  811. case (byte) 0xac: // U+202C 0xe280ac POP DIRECTIONAL FORMATTING
  812. case (byte) 0xad: // U+202D 0xe280ad LEFT-TO-RIGHT OVERRIDE
  813. case (byte) 0xae: // U+202E 0xe280ae RIGHT-TO-LEFT OVERRIDE
  814. ignorable = true;
  815. ptr += 3;
  816. continue;
  817. default:
  818. return false;
  819. }
  820. case (byte) 0x81:
  821. switch (raw[ptr + 2]) {
  822. case (byte) 0xaa: // U+206A 0xe281aa INHIBIT SYMMETRIC SWAPPING
  823. case (byte) 0xab: // U+206B 0xe281ab ACTIVATE SYMMETRIC SWAPPING
  824. case (byte) 0xac: // U+206C 0xe281ac INHIBIT ARABIC FORM SHAPING
  825. case (byte) 0xad: // U+206D 0xe281ad ACTIVATE ARABIC FORM SHAPING
  826. case (byte) 0xae: // U+206E 0xe281ae NATIONAL DIGIT SHAPES
  827. case (byte) 0xaf: // U+206F 0xe281af NOMINAL DIGIT SHAPES
  828. ignorable = true;
  829. ptr += 3;
  830. continue;
  831. default:
  832. return false;
  833. }
  834. default:
  835. return false;
  836. }
  837. case (byte) 0xef: // http://www.utf8-chartable.de/unicode-utf8-table.pl?start=65024
  838. if (!checkTruncatedIgnorableUTF8(raw, ptr, end, id)) {
  839. return false;
  840. }
  841. // U+FEFF 0xefbbbf ZERO WIDTH NO-BREAK SPACE
  842. if ((raw[ptr + 1] == (byte) 0xbb)
  843. && (raw[ptr + 2] == (byte) 0xbf)) {
  844. ignorable = true;
  845. ptr += 3;
  846. continue;
  847. }
  848. return false;
  849. default:
  850. if (g == 4)
  851. return false;
  852. if (raw[ptr++] != git[g++])
  853. return false;
  854. }
  855. }
  856. if (g == 4 && ignorable)
  857. return true;
  858. return false;
  859. }
  860. private boolean checkTruncatedIgnorableUTF8(byte[] raw, int ptr, int end,
  861. @Nullable AnyObjectId id) throws CorruptObjectException {
  862. if ((ptr + 2) >= end) {
  863. report(BAD_UTF8, id, MessageFormat.format(
  864. JGitText.get().corruptObjectInvalidNameInvalidUtf8,
  865. toHexString(raw, ptr, end)));
  866. return false;
  867. }
  868. return true;
  869. }
  870. private static String toHexString(byte[] raw, int ptr, int end) {
  871. StringBuilder b = new StringBuilder("0x"); //$NON-NLS-1$
  872. for (int i = ptr; i < end; i++)
  873. b.append(String.format("%02x", Byte.valueOf(raw[i]))); //$NON-NLS-1$
  874. return b.toString();
  875. }
  876. private void checkNotWindowsDevice(byte[] raw, int ptr, int end,
  877. @Nullable AnyObjectId id) throws CorruptObjectException {
  878. switch (toLower(raw[ptr])) {
  879. case 'a': // AUX
  880. if (end - ptr >= 3
  881. && toLower(raw[ptr + 1]) == 'u'
  882. && toLower(raw[ptr + 2]) == 'x'
  883. && (end - ptr == 3 || raw[ptr + 3] == '.')) {
  884. report(WIN32_BAD_NAME, id,
  885. JGitText.get().corruptObjectInvalidNameAux);
  886. }
  887. break;
  888. case 'c': // CON, COM[1-9]
  889. if (end - ptr >= 3
  890. && toLower(raw[ptr + 2]) == 'n'
  891. && toLower(raw[ptr + 1]) == 'o'
  892. && (end - ptr == 3 || raw[ptr + 3] == '.')) {
  893. report(WIN32_BAD_NAME, id,
  894. JGitText.get().corruptObjectInvalidNameCon);
  895. }
  896. if (end - ptr >= 4
  897. && toLower(raw[ptr + 2]) == 'm'
  898. && toLower(raw[ptr + 1]) == 'o'
  899. && isPositiveDigit(raw[ptr + 3])
  900. && (end - ptr == 4 || raw[ptr + 4] == '.')) {
  901. report(WIN32_BAD_NAME, id, String.format(
  902. JGitText.get().corruptObjectInvalidNameCom,
  903. Character.valueOf(((char) raw[ptr + 3]))));
  904. }
  905. break;
  906. case 'l': // LPT[1-9]
  907. if (end - ptr >= 4
  908. && toLower(raw[ptr + 1]) == 'p'
  909. && toLower(raw[ptr + 2]) == 't'
  910. && isPositiveDigit(raw[ptr + 3])
  911. && (end - ptr == 4 || raw[ptr + 4] == '.')) {
  912. report(WIN32_BAD_NAME, id, String.format(
  913. JGitText.get().corruptObjectInvalidNameLpt,
  914. Character.valueOf(((char) raw[ptr + 3]))));
  915. }
  916. break;
  917. case 'n': // NUL
  918. if (end - ptr >= 3
  919. && toLower(raw[ptr + 1]) == 'u'
  920. && toLower(raw[ptr + 2]) == 'l'
  921. && (end - ptr == 3 || raw[ptr + 3] == '.')) {
  922. report(WIN32_BAD_NAME, id,
  923. JGitText.get().corruptObjectInvalidNameNul);
  924. }
  925. break;
  926. case 'p': // PRN
  927. if (end - ptr >= 3
  928. && toLower(raw[ptr + 1]) == 'r'
  929. && toLower(raw[ptr + 2]) == 'n'
  930. && (end - ptr == 3 || raw[ptr + 3] == '.')) {
  931. report(WIN32_BAD_NAME, id,
  932. JGitText.get().corruptObjectInvalidNamePrn);
  933. }
  934. break;
  935. }
  936. }
  937. private static boolean isInvalidOnWindows(byte c) {
  938. // Windows disallows "special" characters in a path component.
  939. switch (c) {
  940. case '"':
  941. case '*':
  942. case ':':
  943. case '<':
  944. case '>':
  945. case '?':
  946. case '\\':
  947. case '|':
  948. return true;
  949. }
  950. return 1 <= c && c <= 31;
  951. }
  952. private static boolean isGit(byte[] buf, int p) {
  953. return toLower(buf[p]) == 'g'
  954. && toLower(buf[p + 1]) == 'i'
  955. && toLower(buf[p + 2]) == 't';
  956. }
  957. private static boolean isGitTilde1(byte[] buf, int p, int end) {
  958. if (end - p != 5)
  959. return false;
  960. return toLower(buf[p]) == 'g' && toLower(buf[p + 1]) == 'i'
  961. && toLower(buf[p + 2]) == 't' && buf[p + 3] == '~'
  962. && buf[p + 4] == '1';
  963. }
  964. private static boolean isNormalizedGit(byte[] raw, int ptr, int end) {
  965. if (isGit(raw, ptr)) {
  966. int dots = 0;
  967. boolean space = false;
  968. int p = end - 1;
  969. for (; (ptr + 2) < p; p--) {
  970. if (raw[p] == '.')
  971. dots++;
  972. else if (raw[p] == ' ')
  973. space = true;
  974. else
  975. break;
  976. }
  977. return p == ptr + 2 && (dots == 1 || space);
  978. }
  979. return false;
  980. }
  981. private boolean match(byte[] b, byte[] src) {
  982. int r = RawParseUtils.match(b, bufPtr.value, src);
  983. if (r < 0) {
  984. return false;
  985. }
  986. bufPtr.value = r;
  987. return true;
  988. }
  989. private static char toLower(byte b) {
  990. if ('A' <= b && b <= 'Z')
  991. return (char) (b + ('a' - 'A'));
  992. return (char) b;
  993. }
  994. private static boolean isPositiveDigit(byte b) {
  995. return '1' <= b && b <= '9';
  996. }
  997. /**
  998. * Check a blob for errors.
  999. *
  1000. * @param raw
  1001. * the blob data. The array is never modified.
  1002. * @throws CorruptObjectException
  1003. * if any error was detected.
  1004. */
  1005. public void checkBlob(final byte[] raw) throws CorruptObjectException {
  1006. // We can always assume the blob is valid.
  1007. }
  1008. private String normalize(byte[] raw, int ptr, int end) {
  1009. String n = RawParseUtils.decode(raw, ptr, end).toLowerCase(Locale.US);
  1010. return macosx ? Normalizer.normalize(n, Normalizer.Form.NFC) : n;
  1011. }
  1012. }