You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ObjectChecker.java 32KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084
  1. /*
  2. * Copyright (C) 2008-2010, Google Inc.
  3. * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
  4. * and other copyright owners as documented in the project's IP log.
  5. *
  6. * This program and the accompanying materials are made available
  7. * under the terms of the Eclipse Distribution License v1.0 which
  8. * accompanies this distribution, is reproduced below, and is
  9. * available at http://www.eclipse.org/org/documents/edl-v10.php
  10. *
  11. * All rights reserved.
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. *
  20. * - Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials provided
  23. * with the distribution.
  24. *
  25. * - Neither the name of the Eclipse Foundation, Inc. nor the
  26. * names of its contributors may be used to endorse or promote
  27. * products derived from this software without specific prior
  28. * written permission.
  29. *
  30. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  31. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  32. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  36. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  37. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  38. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  39. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  40. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  42. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. */
  44. package org.eclipse.jgit.lib;
  45. import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
  46. import static org.eclipse.jgit.lib.Constants.OBJECT_ID_STRING_LENGTH;
  47. import static org.eclipse.jgit.lib.Constants.OBJ_BAD;
  48. import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
  49. import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT;
  50. import static org.eclipse.jgit.lib.Constants.OBJ_TAG;
  51. import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
  52. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_DATE;
  53. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_EMAIL;
  54. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_OBJECT_SHA1;
  55. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_PARENT_SHA1;
  56. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_TIMEZONE;
  57. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_TREE_SHA1;
  58. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_UTF8;
  59. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.DUPLICATE_ENTRIES;
  60. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.EMPTY_NAME;
  61. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.FULL_PATHNAME;
  62. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOT;
  63. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOTDOT;
  64. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOTGIT;
  65. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_AUTHOR;
  66. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_COMMITTER;
  67. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_EMAIL;
  68. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_OBJECT;
  69. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_SPACE_BEFORE_DATE;
  70. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TAG_ENTRY;
  71. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TREE;
  72. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TYPE_ENTRY;
  73. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.NULL_SHA1;
  74. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.TREE_NOT_SORTED;
  75. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.UNKNOWN_TYPE;
  76. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.WIN32_BAD_NAME;
  77. import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.ZERO_PADDED_FILEMODE;
  78. import static org.eclipse.jgit.util.Paths.compare;
  79. import static org.eclipse.jgit.util.Paths.compareSameName;
  80. import static org.eclipse.jgit.util.RawParseUtils.nextLF;
  81. import static org.eclipse.jgit.util.RawParseUtils.parseBase10;
  82. import java.text.MessageFormat;
  83. import java.text.Normalizer;
  84. import java.util.EnumSet;
  85. import java.util.HashSet;
  86. import java.util.Locale;
  87. import java.util.Set;
  88. import org.eclipse.jgit.annotations.NonNull;
  89. import org.eclipse.jgit.annotations.Nullable;
  90. import org.eclipse.jgit.errors.CorruptObjectException;
  91. import org.eclipse.jgit.internal.JGitText;
  92. import org.eclipse.jgit.util.MutableInteger;
  93. import org.eclipse.jgit.util.RawParseUtils;
  94. import org.eclipse.jgit.util.StringUtils;
  95. /**
  96. * Verifies that an object is formatted correctly.
  97. * <p>
  98. * Verifications made by this class only check that the fields of an object are
  99. * formatted correctly. The ObjectId checksum of the object is not verified, and
  100. * connectivity links between objects are also not verified. Its assumed that
  101. * the caller can provide both of these validations on its own.
  102. * <p>
  103. * Instances of this class are not thread safe, but they may be reused to
  104. * perform multiple object validations.
  105. */
  106. public class ObjectChecker {
  107. /** Header "tree " */
  108. public static final byte[] tree = Constants.encodeASCII("tree "); //$NON-NLS-1$
  109. /** Header "parent " */
  110. public static final byte[] parent = Constants.encodeASCII("parent "); //$NON-NLS-1$
  111. /** Header "author " */
  112. public static final byte[] author = Constants.encodeASCII("author "); //$NON-NLS-1$
  113. /** Header "committer " */
  114. public static final byte[] committer = Constants.encodeASCII("committer "); //$NON-NLS-1$
  115. /** Header "encoding " */
  116. public static final byte[] encoding = Constants.encodeASCII("encoding "); //$NON-NLS-1$
  117. /** Header "object " */
  118. public static final byte[] object = Constants.encodeASCII("object "); //$NON-NLS-1$
  119. /** Header "type " */
  120. public static final byte[] type = Constants.encodeASCII("type "); //$NON-NLS-1$
  121. /** Header "tag " */
  122. public static final byte[] tag = Constants.encodeASCII("tag "); //$NON-NLS-1$
  123. /** Header "tagger " */
  124. public static final byte[] tagger = Constants.encodeASCII("tagger "); //$NON-NLS-1$
  125. /**
  126. * Potential issues identified by the checker.
  127. *
  128. * @since 4.2
  129. */
  130. public enum ErrorType {
  131. // @formatter:off
  132. // These names match git-core so that fsck section keys also match.
  133. /***/ NULL_SHA1,
  134. /***/ DUPLICATE_ENTRIES,
  135. /***/ TREE_NOT_SORTED,
  136. /***/ ZERO_PADDED_FILEMODE,
  137. /***/ EMPTY_NAME,
  138. /***/ FULL_PATHNAME,
  139. /***/ HAS_DOT,
  140. /***/ HAS_DOTDOT,
  141. /***/ HAS_DOTGIT,
  142. /***/ BAD_OBJECT_SHA1,
  143. /***/ BAD_PARENT_SHA1,
  144. /***/ BAD_TREE_SHA1,
  145. /***/ MISSING_AUTHOR,
  146. /***/ MISSING_COMMITTER,
  147. /***/ MISSING_OBJECT,
  148. /***/ MISSING_TREE,
  149. /***/ MISSING_TYPE_ENTRY,
  150. /***/ MISSING_TAG_ENTRY,
  151. /***/ BAD_DATE,
  152. /***/ BAD_EMAIL,
  153. /***/ BAD_TIMEZONE,
  154. /***/ MISSING_EMAIL,
  155. /***/ MISSING_SPACE_BEFORE_DATE,
  156. /***/ UNKNOWN_TYPE,
  157. // These are unique to JGit.
  158. /***/ WIN32_BAD_NAME,
  159. /***/ BAD_UTF8;
  160. // @formatter:on
  161. /** @return camelCaseVersion of the name. */
  162. public String getMessageId() {
  163. String n = name();
  164. StringBuilder r = new StringBuilder(n.length());
  165. for (int i = 0; i < n.length(); i++) {
  166. char c = n.charAt(i);
  167. if (c != '_') {
  168. r.append(StringUtils.toLowerCase(c));
  169. } else {
  170. r.append(n.charAt(++i));
  171. }
  172. }
  173. return r.toString();
  174. }
  175. }
  176. private final MutableObjectId tempId = new MutableObjectId();
  177. private final MutableInteger bufPtr = new MutableInteger();
  178. private EnumSet<ErrorType> errors = EnumSet.allOf(ErrorType.class);
  179. private ObjectIdSet skipList;
  180. private boolean allowInvalidPersonIdent;
  181. private boolean windows;
  182. private boolean macosx;
  183. /**
  184. * Enable accepting specific malformed (but not horribly broken) objects.
  185. *
  186. * @param objects
  187. * collection of object names known to be broken in a non-fatal
  188. * way that should be ignored by the checker.
  189. * @return {@code this}
  190. * @since 4.2
  191. */
  192. public ObjectChecker setSkipList(@Nullable ObjectIdSet objects) {
  193. skipList = objects;
  194. return this;
  195. }
  196. /**
  197. * Configure error types to be ignored across all objects.
  198. *
  199. * @param ids
  200. * error types to ignore. The caller's set is copied.
  201. * @return {@code this}
  202. * @since 4.2
  203. */
  204. public ObjectChecker setIgnore(@Nullable Set<ErrorType> ids) {
  205. errors = EnumSet.allOf(ErrorType.class);
  206. if (ids != null) {
  207. errors.removeAll(ids);
  208. }
  209. return this;
  210. }
  211. /**
  212. * Add message type to be ignored across all objects.
  213. *
  214. * @param id
  215. * error type to ignore.
  216. * @param ignore
  217. * true to ignore this error; false to treat the error as an
  218. * error and throw.
  219. * @return {@code this}
  220. * @since 4.2
  221. */
  222. public ObjectChecker setIgnore(ErrorType id, boolean ignore) {
  223. if (ignore) {
  224. errors.remove(id);
  225. } else {
  226. errors.add(id);
  227. }
  228. return this;
  229. }
  230. /**
  231. * Enable accepting leading zero mode in tree entries.
  232. * <p>
  233. * Some broken Git libraries generated leading zeros in the mode part of
  234. * tree entries. This is technically incorrect but gracefully allowed by
  235. * git-core. JGit rejects such trees by default, but may need to accept
  236. * them on broken histories.
  237. * <p>
  238. * Same as {@code setIgnore(ZERO_PADDED_FILEMODE, allow)}.
  239. *
  240. * @param allow allow leading zero mode.
  241. * @return {@code this}.
  242. * @since 3.4
  243. */
  244. public ObjectChecker setAllowLeadingZeroFileMode(boolean allow) {
  245. return setIgnore(ZERO_PADDED_FILEMODE, allow);
  246. }
  247. /**
  248. * Enable accepting invalid author, committer and tagger identities.
  249. * <p>
  250. * Some broken Git versions/libraries allowed users to create commits and
  251. * tags with invalid formatting between the name, email and timestamp.
  252. *
  253. * @param allow
  254. * if true accept invalid person identity strings.
  255. * @return {@code this}.
  256. * @since 4.0
  257. */
  258. public ObjectChecker setAllowInvalidPersonIdent(boolean allow) {
  259. allowInvalidPersonIdent = allow;
  260. return this;
  261. }
  262. /**
  263. * Restrict trees to only names legal on Windows platforms.
  264. * <p>
  265. * Also rejects any mixed case forms of reserved names ({@code .git}).
  266. *
  267. * @param win true if Windows name checking should be performed.
  268. * @return {@code this}.
  269. * @since 3.4
  270. */
  271. public ObjectChecker setSafeForWindows(boolean win) {
  272. windows = win;
  273. return this;
  274. }
  275. /**
  276. * Restrict trees to only names legal on Mac OS X platforms.
  277. * <p>
  278. * Rejects any mixed case forms of reserved names ({@code .git})
  279. * for users working on HFS+ in case-insensitive (default) mode.
  280. *
  281. * @param mac true if Mac OS X name checking should be performed.
  282. * @return {@code this}.
  283. * @since 3.4
  284. */
  285. public ObjectChecker setSafeForMacOS(boolean mac) {
  286. macosx = mac;
  287. return this;
  288. }
  289. /**
  290. * Check an object for parsing errors.
  291. *
  292. * @param objType
  293. * type of the object. Must be a valid object type code in
  294. * {@link Constants}.
  295. * @param raw
  296. * the raw data which comprises the object. This should be in the
  297. * canonical format (that is the format used to generate the
  298. * ObjectId of the object). The array is never modified.
  299. * @throws CorruptObjectException
  300. * if an error is identified.
  301. */
  302. public void check(int objType, byte[] raw)
  303. throws CorruptObjectException {
  304. check(idFor(objType, raw), objType, raw);
  305. }
  306. /**
  307. * Check an object for parsing errors.
  308. *
  309. * @param id
  310. * identify of the object being checked.
  311. * @param objType
  312. * type of the object. Must be a valid object type code in
  313. * {@link Constants}.
  314. * @param raw
  315. * the raw data which comprises the object. This should be in the
  316. * canonical format (that is the format used to generate the
  317. * ObjectId of the object). The array is never modified.
  318. * @throws CorruptObjectException
  319. * if an error is identified.
  320. * @since 4.2
  321. */
  322. public void check(@Nullable AnyObjectId id, int objType, byte[] raw)
  323. throws CorruptObjectException {
  324. switch (objType) {
  325. case OBJ_COMMIT:
  326. checkCommit(id, raw);
  327. break;
  328. case OBJ_TAG:
  329. checkTag(id, raw);
  330. break;
  331. case OBJ_TREE:
  332. checkTree(id, raw);
  333. break;
  334. case OBJ_BLOB:
  335. checkBlob(raw);
  336. break;
  337. default:
  338. report(UNKNOWN_TYPE, id, MessageFormat.format(
  339. JGitText.get().corruptObjectInvalidType2,
  340. Integer.valueOf(objType)));
  341. }
  342. }
  343. private boolean checkId(byte[] raw) {
  344. int p = bufPtr.value;
  345. try {
  346. tempId.fromString(raw, p);
  347. } catch (IllegalArgumentException e) {
  348. bufPtr.value = nextLF(raw, p);
  349. return false;
  350. }
  351. p += OBJECT_ID_STRING_LENGTH;
  352. if (raw[p] == '\n') {
  353. bufPtr.value = p + 1;
  354. return true;
  355. }
  356. bufPtr.value = nextLF(raw, p);
  357. return false;
  358. }
  359. private void checkPersonIdent(byte[] raw, @Nullable AnyObjectId id)
  360. throws CorruptObjectException {
  361. if (allowInvalidPersonIdent) {
  362. bufPtr.value = nextLF(raw, bufPtr.value);
  363. return;
  364. }
  365. final int emailB = nextLF(raw, bufPtr.value, '<');
  366. if (emailB == bufPtr.value || raw[emailB - 1] != '<') {
  367. report(MISSING_EMAIL, id, JGitText.get().corruptObjectMissingEmail);
  368. bufPtr.value = nextLF(raw, bufPtr.value);
  369. return;
  370. }
  371. final int emailE = nextLF(raw, emailB, '>');
  372. if (emailE == emailB || raw[emailE - 1] != '>') {
  373. report(BAD_EMAIL, id, JGitText.get().corruptObjectBadEmail);
  374. bufPtr.value = nextLF(raw, bufPtr.value);
  375. return;
  376. }
  377. if (emailE == raw.length || raw[emailE] != ' ') {
  378. report(MISSING_SPACE_BEFORE_DATE, id,
  379. JGitText.get().corruptObjectBadDate);
  380. bufPtr.value = nextLF(raw, bufPtr.value);
  381. return;
  382. }
  383. parseBase10(raw, emailE + 1, bufPtr); // when
  384. if (emailE + 1 == bufPtr.value || bufPtr.value == raw.length
  385. || raw[bufPtr.value] != ' ') {
  386. report(BAD_DATE, id, JGitText.get().corruptObjectBadDate);
  387. bufPtr.value = nextLF(raw, bufPtr.value);
  388. return;
  389. }
  390. int p = bufPtr.value + 1;
  391. parseBase10(raw, p, bufPtr); // tz offset
  392. if (p == bufPtr.value) {
  393. report(BAD_TIMEZONE, id, JGitText.get().corruptObjectBadTimezone);
  394. bufPtr.value = nextLF(raw, bufPtr.value);
  395. return;
  396. }
  397. p = bufPtr.value;
  398. if (raw[p] == '\n') {
  399. bufPtr.value = p + 1;
  400. } else {
  401. report(BAD_TIMEZONE, id, JGitText.get().corruptObjectBadTimezone);
  402. bufPtr.value = nextLF(raw, p);
  403. }
  404. }
  405. /**
  406. * Check a commit for errors.
  407. *
  408. * @param raw
  409. * the commit data. The array is never modified.
  410. * @throws CorruptObjectException
  411. * if any error was detected.
  412. */
  413. public void checkCommit(byte[] raw) throws CorruptObjectException {
  414. checkCommit(idFor(OBJ_COMMIT, raw), raw);
  415. }
  416. /**
  417. * Check a commit for errors.
  418. *
  419. * @param id
  420. * identity of the object being checked.
  421. * @param raw
  422. * the commit data. The array is never modified.
  423. * @throws CorruptObjectException
  424. * if any error was detected.
  425. * @since 4.2
  426. */
  427. public void checkCommit(@Nullable AnyObjectId id, byte[] raw)
  428. throws CorruptObjectException {
  429. bufPtr.value = 0;
  430. if (!match(raw, tree)) {
  431. report(MISSING_TREE, id, JGitText.get().corruptObjectNotreeHeader);
  432. } else if (!checkId(raw)) {
  433. report(BAD_TREE_SHA1, id, JGitText.get().corruptObjectInvalidTree);
  434. }
  435. while (match(raw, parent)) {
  436. if (!checkId(raw)) {
  437. report(BAD_PARENT_SHA1, id,
  438. JGitText.get().corruptObjectInvalidParent);
  439. }
  440. }
  441. if (match(raw, author)) {
  442. checkPersonIdent(raw, id);
  443. } else {
  444. report(MISSING_AUTHOR, id, JGitText.get().corruptObjectNoAuthor);
  445. }
  446. if (match(raw, committer)) {
  447. checkPersonIdent(raw, id);
  448. } else {
  449. report(MISSING_COMMITTER, id,
  450. JGitText.get().corruptObjectNoCommitter);
  451. }
  452. }
  453. /**
  454. * Check an annotated tag for errors.
  455. *
  456. * @param raw
  457. * the tag data. The array is never modified.
  458. * @throws CorruptObjectException
  459. * if any error was detected.
  460. */
  461. public void checkTag(byte[] raw) throws CorruptObjectException {
  462. checkTag(idFor(OBJ_TAG, raw), raw);
  463. }
  464. /**
  465. * Check an annotated tag for errors.
  466. *
  467. * @param id
  468. * identity of the object being checked.
  469. * @param raw
  470. * the tag data. The array is never modified.
  471. * @throws CorruptObjectException
  472. * if any error was detected.
  473. * @since 4.2
  474. */
  475. public void checkTag(@Nullable AnyObjectId id, byte[] raw)
  476. throws CorruptObjectException {
  477. bufPtr.value = 0;
  478. if (!match(raw, object)) {
  479. report(MISSING_OBJECT, id,
  480. JGitText.get().corruptObjectNoObjectHeader);
  481. } else if (!checkId(raw)) {
  482. report(BAD_OBJECT_SHA1, id,
  483. JGitText.get().corruptObjectInvalidObject);
  484. }
  485. if (!match(raw, type)) {
  486. report(MISSING_TYPE_ENTRY, id,
  487. JGitText.get().corruptObjectNoTypeHeader);
  488. }
  489. bufPtr.value = nextLF(raw, bufPtr.value);
  490. if (!match(raw, tag)) {
  491. report(MISSING_TAG_ENTRY, id,
  492. JGitText.get().corruptObjectNoTagHeader);
  493. }
  494. bufPtr.value = nextLF(raw, bufPtr.value);
  495. if (match(raw, tagger)) {
  496. checkPersonIdent(raw, id);
  497. }
  498. }
  499. private static boolean duplicateName(final byte[] raw,
  500. final int thisNamePos, final int thisNameEnd) {
  501. final int sz = raw.length;
  502. int nextPtr = thisNameEnd + 1 + Constants.OBJECT_ID_LENGTH;
  503. for (;;) {
  504. int nextMode = 0;
  505. for (;;) {
  506. if (nextPtr >= sz)
  507. return false;
  508. final byte c = raw[nextPtr++];
  509. if (' ' == c)
  510. break;
  511. nextMode <<= 3;
  512. nextMode += c - '0';
  513. }
  514. final int nextNamePos = nextPtr;
  515. for (;;) {
  516. if (nextPtr == sz)
  517. return false;
  518. final byte c = raw[nextPtr++];
  519. if (c == 0)
  520. break;
  521. }
  522. if (nextNamePos + 1 == nextPtr)
  523. return false;
  524. int cmp = compareSameName(
  525. raw, thisNamePos, thisNameEnd,
  526. raw, nextNamePos, nextPtr - 1, nextMode);
  527. if (cmp < 0)
  528. return false;
  529. else if (cmp == 0)
  530. return true;
  531. nextPtr += Constants.OBJECT_ID_LENGTH;
  532. }
  533. }
  534. /**
  535. * Check a canonical formatted tree for errors.
  536. *
  537. * @param raw
  538. * the raw tree data. The array is never modified.
  539. * @throws CorruptObjectException
  540. * if any error was detected.
  541. */
  542. public void checkTree(byte[] raw) throws CorruptObjectException {
  543. checkTree(idFor(OBJ_TREE, raw), raw);
  544. }
  545. /**
  546. * Check a canonical formatted tree for errors.
  547. *
  548. * @param id
  549. * identity of the object being checked.
  550. * @param raw
  551. * the raw tree data. The array is never modified.
  552. * @throws CorruptObjectException
  553. * if any error was detected.
  554. * @since 4.2
  555. */
  556. public void checkTree(@Nullable AnyObjectId id, byte[] raw)
  557. throws CorruptObjectException {
  558. final int sz = raw.length;
  559. int ptr = 0;
  560. int lastNameB = 0, lastNameE = 0, lastMode = 0;
  561. Set<String> normalized = windows || macosx
  562. ? new HashSet<String>()
  563. : null;
  564. while (ptr < sz) {
  565. int thisMode = 0;
  566. for (;;) {
  567. if (ptr == sz) {
  568. throw new CorruptObjectException(
  569. JGitText.get().corruptObjectTruncatedInMode);
  570. }
  571. final byte c = raw[ptr++];
  572. if (' ' == c)
  573. break;
  574. if (c < '0' || c > '7') {
  575. throw new CorruptObjectException(
  576. JGitText.get().corruptObjectInvalidModeChar);
  577. }
  578. if (thisMode == 0 && c == '0') {
  579. report(ZERO_PADDED_FILEMODE, id,
  580. JGitText.get().corruptObjectInvalidModeStartsZero);
  581. }
  582. thisMode <<= 3;
  583. thisMode += c - '0';
  584. }
  585. if (FileMode.fromBits(thisMode).getObjectType() == OBJ_BAD) {
  586. throw new CorruptObjectException(MessageFormat.format(
  587. JGitText.get().corruptObjectInvalidMode2,
  588. Integer.valueOf(thisMode)));
  589. }
  590. final int thisNameB = ptr;
  591. ptr = scanPathSegment(raw, ptr, sz, id);
  592. if (ptr == sz || raw[ptr] != 0) {
  593. throw new CorruptObjectException(
  594. JGitText.get().corruptObjectTruncatedInName);
  595. }
  596. checkPathSegment2(raw, thisNameB, ptr, id);
  597. if (normalized != null) {
  598. if (!normalized.add(normalize(raw, thisNameB, ptr))) {
  599. report(DUPLICATE_ENTRIES, id,
  600. JGitText.get().corruptObjectDuplicateEntryNames);
  601. }
  602. } else if (duplicateName(raw, thisNameB, ptr)) {
  603. report(DUPLICATE_ENTRIES, id,
  604. JGitText.get().corruptObjectDuplicateEntryNames);
  605. }
  606. if (lastNameB != 0) {
  607. int cmp = compare(
  608. raw, lastNameB, lastNameE, lastMode,
  609. raw, thisNameB, ptr, thisMode);
  610. if (cmp > 0) {
  611. report(TREE_NOT_SORTED, id,
  612. JGitText.get().corruptObjectIncorrectSorting);
  613. }
  614. }
  615. lastNameB = thisNameB;
  616. lastNameE = ptr;
  617. lastMode = thisMode;
  618. ptr += 1 + OBJECT_ID_LENGTH;
  619. if (ptr > sz) {
  620. throw new CorruptObjectException(
  621. JGitText.get().corruptObjectTruncatedInObjectId);
  622. }
  623. if (ObjectId.zeroId().compareTo(raw, ptr - OBJECT_ID_LENGTH) == 0) {
  624. report(NULL_SHA1, id, JGitText.get().corruptObjectZeroId);
  625. }
  626. }
  627. }
  628. private int scanPathSegment(byte[] raw, int ptr, int end,
  629. @Nullable AnyObjectId id) throws CorruptObjectException {
  630. for (; ptr < end; ptr++) {
  631. byte c = raw[ptr];
  632. if (c == 0) {
  633. return ptr;
  634. }
  635. if (c == '/') {
  636. report(FULL_PATHNAME, id,
  637. JGitText.get().corruptObjectNameContainsSlash);
  638. }
  639. if (windows && isInvalidOnWindows(c)) {
  640. if (c > 31) {
  641. throw new CorruptObjectException(String.format(
  642. JGitText.get().corruptObjectNameContainsChar,
  643. Byte.valueOf(c)));
  644. }
  645. throw new CorruptObjectException(String.format(
  646. JGitText.get().corruptObjectNameContainsByte,
  647. Integer.valueOf(c & 0xff)));
  648. }
  649. }
  650. return ptr;
  651. }
  652. @SuppressWarnings("resource")
  653. @Nullable
  654. private ObjectId idFor(int objType, byte[] raw) {
  655. if (skipList != null) {
  656. return new ObjectInserter.Formatter().idFor(objType, raw);
  657. }
  658. return null;
  659. }
  660. private void report(@NonNull ErrorType err, @Nullable AnyObjectId id,
  661. String why) throws CorruptObjectException {
  662. if (errors.contains(err)
  663. && (id == null || skipList == null || !skipList.contains(id))) {
  664. if (id != null) {
  665. throw new CorruptObjectException(err, id, why);
  666. }
  667. throw new CorruptObjectException(why);
  668. }
  669. }
  670. /**
  671. * Check tree path entry for validity.
  672. * <p>
  673. * Unlike {@link #checkPathSegment(byte[], int, int)}, this version
  674. * scans a multi-directory path string such as {@code "src/main.c"}.
  675. *
  676. * @param path path string to scan.
  677. * @throws CorruptObjectException path is invalid.
  678. * @since 3.6
  679. */
  680. public void checkPath(String path) throws CorruptObjectException {
  681. byte[] buf = Constants.encode(path);
  682. checkPath(buf, 0, buf.length);
  683. }
  684. /**
  685. * Check tree path entry for validity.
  686. * <p>
  687. * Unlike {@link #checkPathSegment(byte[], int, int)}, this version
  688. * scans a multi-directory path string such as {@code "src/main.c"}.
  689. *
  690. * @param raw buffer to scan.
  691. * @param ptr offset to first byte of the name.
  692. * @param end offset to one past last byte of name.
  693. * @throws CorruptObjectException path is invalid.
  694. * @since 3.6
  695. */
  696. public void checkPath(byte[] raw, int ptr, int end)
  697. throws CorruptObjectException {
  698. int start = ptr;
  699. for (; ptr < end; ptr++) {
  700. if (raw[ptr] == '/') {
  701. checkPathSegment(raw, start, ptr);
  702. start = ptr + 1;
  703. }
  704. }
  705. checkPathSegment(raw, start, end);
  706. }
  707. /**
  708. * Check tree path entry for validity.
  709. *
  710. * @param raw buffer to scan.
  711. * @param ptr offset to first byte of the name.
  712. * @param end offset to one past last byte of name.
  713. * @throws CorruptObjectException name is invalid.
  714. * @since 3.4
  715. */
  716. public void checkPathSegment(byte[] raw, int ptr, int end)
  717. throws CorruptObjectException {
  718. int e = scanPathSegment(raw, ptr, end, null);
  719. if (e < end && raw[e] == 0)
  720. throw new CorruptObjectException(
  721. JGitText.get().corruptObjectNameContainsNullByte);
  722. checkPathSegment2(raw, ptr, end, null);
  723. }
  724. private void checkPathSegment2(byte[] raw, int ptr, int end,
  725. @Nullable AnyObjectId id) throws CorruptObjectException {
  726. if (ptr == end) {
  727. report(EMPTY_NAME, id, JGitText.get().corruptObjectNameZeroLength);
  728. return;
  729. }
  730. if (raw[ptr] == '.') {
  731. switch (end - ptr) {
  732. case 1:
  733. report(HAS_DOT, id, JGitText.get().corruptObjectNameDot);
  734. break;
  735. case 2:
  736. if (raw[ptr + 1] == '.') {
  737. report(HAS_DOTDOT, id,
  738. JGitText.get().corruptObjectNameDotDot);
  739. }
  740. break;
  741. case 4:
  742. if (isGit(raw, ptr + 1)) {
  743. report(HAS_DOTGIT, id, String.format(
  744. JGitText.get().corruptObjectInvalidName,
  745. RawParseUtils.decode(raw, ptr, end)));
  746. }
  747. break;
  748. default:
  749. if (end - ptr > 4 && isNormalizedGit(raw, ptr + 1, end)) {
  750. report(HAS_DOTGIT, id, String.format(
  751. JGitText.get().corruptObjectInvalidName,
  752. RawParseUtils.decode(raw, ptr, end)));
  753. }
  754. }
  755. } else if (isGitTilde1(raw, ptr, end)) {
  756. report(HAS_DOTGIT, id, String.format(
  757. JGitText.get().corruptObjectInvalidName,
  758. RawParseUtils.decode(raw, ptr, end)));
  759. }
  760. if (macosx && isMacHFSGit(raw, ptr, end, id)) {
  761. report(HAS_DOTGIT, id, String.format(
  762. JGitText.get().corruptObjectInvalidNameIgnorableUnicode,
  763. RawParseUtils.decode(raw, ptr, end)));
  764. }
  765. if (windows) {
  766. // Windows ignores space and dot at end of file name.
  767. if (raw[end - 1] == ' ' || raw[end - 1] == '.') {
  768. report(WIN32_BAD_NAME, id, String.format(
  769. JGitText.get().corruptObjectInvalidNameEnd,
  770. Character.valueOf(((char) raw[end - 1]))));
  771. }
  772. if (end - ptr >= 3) {
  773. checkNotWindowsDevice(raw, ptr, end, id);
  774. }
  775. }
  776. }
  777. // Mac's HFS+ folds permutations of ".git" and Unicode ignorable characters
  778. // to ".git" therefore we should prevent such names
  779. private boolean isMacHFSGit(byte[] raw, int ptr, int end,
  780. @Nullable AnyObjectId id) throws CorruptObjectException {
  781. boolean ignorable = false;
  782. byte[] git = new byte[] { '.', 'g', 'i', 't' };
  783. int g = 0;
  784. while (ptr < end) {
  785. switch (raw[ptr]) {
  786. case (byte) 0xe2: // http://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192
  787. if (!checkTruncatedIgnorableUTF8(raw, ptr, end, id)) {
  788. return false;
  789. }
  790. switch (raw[ptr + 1]) {
  791. case (byte) 0x80:
  792. switch (raw[ptr + 2]) {
  793. case (byte) 0x8c: // U+200C 0xe2808c ZERO WIDTH NON-JOINER
  794. case (byte) 0x8d: // U+200D 0xe2808d ZERO WIDTH JOINER
  795. case (byte) 0x8e: // U+200E 0xe2808e LEFT-TO-RIGHT MARK
  796. case (byte) 0x8f: // U+200F 0xe2808f RIGHT-TO-LEFT MARK
  797. case (byte) 0xaa: // U+202A 0xe280aa LEFT-TO-RIGHT EMBEDDING
  798. case (byte) 0xab: // U+202B 0xe280ab RIGHT-TO-LEFT EMBEDDING
  799. case (byte) 0xac: // U+202C 0xe280ac POP DIRECTIONAL FORMATTING
  800. case (byte) 0xad: // U+202D 0xe280ad LEFT-TO-RIGHT OVERRIDE
  801. case (byte) 0xae: // U+202E 0xe280ae RIGHT-TO-LEFT OVERRIDE
  802. ignorable = true;
  803. ptr += 3;
  804. continue;
  805. default:
  806. return false;
  807. }
  808. case (byte) 0x81:
  809. switch (raw[ptr + 2]) {
  810. case (byte) 0xaa: // U+206A 0xe281aa INHIBIT SYMMETRIC SWAPPING
  811. case (byte) 0xab: // U+206B 0xe281ab ACTIVATE SYMMETRIC SWAPPING
  812. case (byte) 0xac: // U+206C 0xe281ac INHIBIT ARABIC FORM SHAPING
  813. case (byte) 0xad: // U+206D 0xe281ad ACTIVATE ARABIC FORM SHAPING
  814. case (byte) 0xae: // U+206E 0xe281ae NATIONAL DIGIT SHAPES
  815. case (byte) 0xaf: // U+206F 0xe281af NOMINAL DIGIT SHAPES
  816. ignorable = true;
  817. ptr += 3;
  818. continue;
  819. default:
  820. return false;
  821. }
  822. default:
  823. return false;
  824. }
  825. case (byte) 0xef: // http://www.utf8-chartable.de/unicode-utf8-table.pl?start=65024
  826. if (!checkTruncatedIgnorableUTF8(raw, ptr, end, id)) {
  827. return false;
  828. }
  829. // U+FEFF 0xefbbbf ZERO WIDTH NO-BREAK SPACE
  830. if ((raw[ptr + 1] == (byte) 0xbb)
  831. && (raw[ptr + 2] == (byte) 0xbf)) {
  832. ignorable = true;
  833. ptr += 3;
  834. continue;
  835. }
  836. return false;
  837. default:
  838. if (g == 4)
  839. return false;
  840. if (raw[ptr++] != git[g++])
  841. return false;
  842. }
  843. }
  844. if (g == 4 && ignorable)
  845. return true;
  846. return false;
  847. }
  848. private boolean checkTruncatedIgnorableUTF8(byte[] raw, int ptr, int end,
  849. @Nullable AnyObjectId id) throws CorruptObjectException {
  850. if ((ptr + 2) >= end) {
  851. report(BAD_UTF8, id, MessageFormat.format(
  852. JGitText.get().corruptObjectInvalidNameInvalidUtf8,
  853. toHexString(raw, ptr, end)));
  854. return false;
  855. }
  856. return true;
  857. }
  858. private static String toHexString(byte[] raw, int ptr, int end) {
  859. StringBuilder b = new StringBuilder("0x"); //$NON-NLS-1$
  860. for (int i = ptr; i < end; i++)
  861. b.append(String.format("%02x", Byte.valueOf(raw[i]))); //$NON-NLS-1$
  862. return b.toString();
  863. }
  864. private void checkNotWindowsDevice(byte[] raw, int ptr, int end,
  865. @Nullable AnyObjectId id) throws CorruptObjectException {
  866. switch (toLower(raw[ptr])) {
  867. case 'a': // AUX
  868. if (end - ptr >= 3
  869. && toLower(raw[ptr + 1]) == 'u'
  870. && toLower(raw[ptr + 2]) == 'x'
  871. && (end - ptr == 3 || raw[ptr + 3] == '.')) {
  872. report(WIN32_BAD_NAME, id,
  873. JGitText.get().corruptObjectInvalidNameAux);
  874. }
  875. break;
  876. case 'c': // CON, COM[1-9]
  877. if (end - ptr >= 3
  878. && toLower(raw[ptr + 2]) == 'n'
  879. && toLower(raw[ptr + 1]) == 'o'
  880. && (end - ptr == 3 || raw[ptr + 3] == '.')) {
  881. report(WIN32_BAD_NAME, id,
  882. JGitText.get().corruptObjectInvalidNameCon);
  883. }
  884. if (end - ptr >= 4
  885. && toLower(raw[ptr + 2]) == 'm'
  886. && toLower(raw[ptr + 1]) == 'o'
  887. && isPositiveDigit(raw[ptr + 3])
  888. && (end - ptr == 4 || raw[ptr + 4] == '.')) {
  889. report(WIN32_BAD_NAME, id, String.format(
  890. JGitText.get().corruptObjectInvalidNameCom,
  891. Character.valueOf(((char) raw[ptr + 3]))));
  892. }
  893. break;
  894. case 'l': // LPT[1-9]
  895. if (end - ptr >= 4
  896. && toLower(raw[ptr + 1]) == 'p'
  897. && toLower(raw[ptr + 2]) == 't'
  898. && isPositiveDigit(raw[ptr + 3])
  899. && (end - ptr == 4 || raw[ptr + 4] == '.')) {
  900. report(WIN32_BAD_NAME, id, String.format(
  901. JGitText.get().corruptObjectInvalidNameLpt,
  902. Character.valueOf(((char) raw[ptr + 3]))));
  903. }
  904. break;
  905. case 'n': // NUL
  906. if (end - ptr >= 3
  907. && toLower(raw[ptr + 1]) == 'u'
  908. && toLower(raw[ptr + 2]) == 'l'
  909. && (end - ptr == 3 || raw[ptr + 3] == '.')) {
  910. report(WIN32_BAD_NAME, id,
  911. JGitText.get().corruptObjectInvalidNameNul);
  912. }
  913. break;
  914. case 'p': // PRN
  915. if (end - ptr >= 3
  916. && toLower(raw[ptr + 1]) == 'r'
  917. && toLower(raw[ptr + 2]) == 'n'
  918. && (end - ptr == 3 || raw[ptr + 3] == '.')) {
  919. report(WIN32_BAD_NAME, id,
  920. JGitText.get().corruptObjectInvalidNamePrn);
  921. }
  922. break;
  923. }
  924. }
  925. private static boolean isInvalidOnWindows(byte c) {
  926. // Windows disallows "special" characters in a path component.
  927. switch (c) {
  928. case '"':
  929. case '*':
  930. case ':':
  931. case '<':
  932. case '>':
  933. case '?':
  934. case '\\':
  935. case '|':
  936. return true;
  937. }
  938. return 1 <= c && c <= 31;
  939. }
  940. private static boolean isGit(byte[] buf, int p) {
  941. return toLower(buf[p]) == 'g'
  942. && toLower(buf[p + 1]) == 'i'
  943. && toLower(buf[p + 2]) == 't';
  944. }
  945. private static boolean isGitTilde1(byte[] buf, int p, int end) {
  946. if (end - p != 5)
  947. return false;
  948. return toLower(buf[p]) == 'g' && toLower(buf[p + 1]) == 'i'
  949. && toLower(buf[p + 2]) == 't' && buf[p + 3] == '~'
  950. && buf[p + 4] == '1';
  951. }
  952. private static boolean isNormalizedGit(byte[] raw, int ptr, int end) {
  953. if (isGit(raw, ptr)) {
  954. int dots = 0;
  955. boolean space = false;
  956. int p = end - 1;
  957. for (; (ptr + 2) < p; p--) {
  958. if (raw[p] == '.')
  959. dots++;
  960. else if (raw[p] == ' ')
  961. space = true;
  962. else
  963. break;
  964. }
  965. return p == ptr + 2 && (dots == 1 || space);
  966. }
  967. return false;
  968. }
  969. private boolean match(byte[] b, byte[] src) {
  970. int r = RawParseUtils.match(b, bufPtr.value, src);
  971. if (r < 0) {
  972. return false;
  973. }
  974. bufPtr.value = r;
  975. return true;
  976. }
  977. private static char toLower(byte b) {
  978. if ('A' <= b && b <= 'Z')
  979. return (char) (b + ('a' - 'A'));
  980. return (char) b;
  981. }
  982. private static boolean isPositiveDigit(byte b) {
  983. return '1' <= b && b <= '9';
  984. }
  985. /**
  986. * Check a blob for errors.
  987. *
  988. * @param raw
  989. * the blob data. The array is never modified.
  990. * @throws CorruptObjectException
  991. * if any error was detected.
  992. */
  993. public void checkBlob(final byte[] raw) throws CorruptObjectException {
  994. // We can always assume the blob is valid.
  995. }
  996. private String normalize(byte[] raw, int ptr, int end) {
  997. String n = RawParseUtils.decode(raw, ptr, end).toLowerCase(Locale.US);
  998. return macosx ? Normalizer.normalize(n, Normalizer.Form.NFC) : n;
  999. }
  1000. }