You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

UnpackedObject.java 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. /*
  2. * Copyright (C) 2007, Robin Rosenberg <robin.rosenberg@dewire.com>
  3. * Copyright (C) 2006-2008, Shawn O. Pearce <spearce@spearce.org>
  4. * Copyright (C) 2010, Google Inc.
  5. * and other copyright owners as documented in the project's IP log.
  6. *
  7. * This program and the accompanying materials are made available
  8. * under the terms of the Eclipse Distribution License v1.0 which
  9. * accompanies this distribution, is reproduced below, and is
  10. * available at http://www.eclipse.org/org/documents/edl-v10.php
  11. *
  12. * All rights reserved.
  13. *
  14. * Redistribution and use in source and binary forms, with or
  15. * without modification, are permitted provided that the following
  16. * conditions are met:
  17. *
  18. * - Redistributions of source code must retain the above copyright
  19. * notice, this list of conditions and the following disclaimer.
  20. *
  21. * - Redistributions in binary form must reproduce the above
  22. * copyright notice, this list of conditions and the following
  23. * disclaimer in the documentation and/or other materials provided
  24. * with the distribution.
  25. *
  26. * - Neither the name of the Eclipse Foundation, Inc. nor the
  27. * names of its contributors may be used to endorse or promote
  28. * products derived from this software without specific prior
  29. * written permission.
  30. *
  31. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  32. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  33. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  34. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  35. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  36. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  37. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  38. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  39. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  40. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  41. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  42. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  43. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  44. */
  45. package org.eclipse.jgit.storage.file;
  46. import java.io.BufferedInputStream;
  47. import java.io.ByteArrayInputStream;
  48. import java.io.File;
  49. import java.io.FileInputStream;
  50. import java.io.FileNotFoundException;
  51. import java.io.IOException;
  52. import java.io.InputStream;
  53. import java.util.zip.DataFormatException;
  54. import java.util.zip.Inflater;
  55. import java.util.zip.InflaterInputStream;
  56. import java.util.zip.ZipException;
  57. import org.eclipse.jgit.errors.CorruptObjectException;
  58. import org.eclipse.jgit.errors.LargeObjectException;
  59. import org.eclipse.jgit.errors.MissingObjectException;
  60. import org.eclipse.jgit.internal.JGitText;
  61. import org.eclipse.jgit.lib.AnyObjectId;
  62. import org.eclipse.jgit.lib.Constants;
  63. import org.eclipse.jgit.lib.InflaterCache;
  64. import org.eclipse.jgit.lib.ObjectId;
  65. import org.eclipse.jgit.lib.ObjectLoader;
  66. import org.eclipse.jgit.lib.ObjectStream;
  67. import org.eclipse.jgit.util.IO;
  68. import org.eclipse.jgit.util.MutableInteger;
  69. import org.eclipse.jgit.util.RawParseUtils;
  70. /**
  71. * Loose object loader. This class loads an object not stored in a pack.
  72. */
  73. public class UnpackedObject {
  74. private static final int BUFFER_SIZE = 8192;
  75. /**
  76. * Parse an object from the unpacked object format.
  77. *
  78. * @param raw
  79. * complete contents of the compressed object.
  80. * @param id
  81. * expected ObjectId of the object, used only for error reporting
  82. * in exceptions.
  83. * @return loader to read the inflated contents.
  84. * @throws IOException
  85. * the object cannot be parsed.
  86. */
  87. public static ObjectLoader parse(byte[] raw, AnyObjectId id)
  88. throws IOException {
  89. WindowCursor wc = new WindowCursor(null);
  90. try {
  91. return open(new ByteArrayInputStream(raw), null, id, wc);
  92. } finally {
  93. wc.release();
  94. }
  95. }
  96. static ObjectLoader open(InputStream in, File path, AnyObjectId id,
  97. WindowCursor wc) throws IOException {
  98. try {
  99. in = buffer(in);
  100. in.mark(20);
  101. final byte[] hdr = new byte[64];
  102. IO.readFully(in, hdr, 0, 2);
  103. if (isStandardFormat(hdr)) {
  104. in.reset();
  105. Inflater inf = wc.inflater();
  106. InputStream zIn = inflate(in, inf);
  107. int avail = readSome(zIn, hdr, 0, 64);
  108. if (avail < 5)
  109. throw new CorruptObjectException(id,
  110. JGitText.get().corruptObjectNoHeader);
  111. final MutableInteger p = new MutableInteger();
  112. int type = Constants.decodeTypeString(id, hdr, (byte) ' ', p);
  113. long size = RawParseUtils.parseLongBase10(hdr, p.value, p);
  114. if (size < 0)
  115. throw new CorruptObjectException(id,
  116. JGitText.get().corruptObjectNegativeSize);
  117. if (hdr[p.value++] != 0)
  118. throw new CorruptObjectException(id,
  119. JGitText.get().corruptObjectGarbageAfterSize);
  120. if (path == null && Integer.MAX_VALUE < size) {
  121. LargeObjectException.ExceedsByteArrayLimit e;
  122. e = new LargeObjectException.ExceedsByteArrayLimit();
  123. e.setObjectId(id);
  124. throw e;
  125. }
  126. if (size < wc.getStreamFileThreshold() || path == null) {
  127. byte[] data = new byte[(int) size];
  128. int n = avail - p.value;
  129. if (n > 0)
  130. System.arraycopy(hdr, p.value, data, 0, n);
  131. IO.readFully(zIn, data, n, data.length - n);
  132. checkValidEndOfStream(in, inf, id, hdr);
  133. return new ObjectLoader.SmallObject(type, data);
  134. }
  135. return new LargeObject(type, size, path, id, wc.db);
  136. } else {
  137. readSome(in, hdr, 2, 18);
  138. int c = hdr[0] & 0xff;
  139. int type = (c >> 4) & 7;
  140. long size = c & 15;
  141. int shift = 4;
  142. int p = 1;
  143. while ((c & 0x80) != 0) {
  144. c = hdr[p++] & 0xff;
  145. size += ((long) (c & 0x7f)) << shift;
  146. shift += 7;
  147. }
  148. switch (type) {
  149. case Constants.OBJ_COMMIT:
  150. case Constants.OBJ_TREE:
  151. case Constants.OBJ_BLOB:
  152. case Constants.OBJ_TAG:
  153. // Acceptable types for a loose object.
  154. break;
  155. default:
  156. throw new CorruptObjectException(id,
  157. JGitText.get().corruptObjectInvalidType);
  158. }
  159. if (path == null && Integer.MAX_VALUE < size) {
  160. LargeObjectException.ExceedsByteArrayLimit e;
  161. e = new LargeObjectException.ExceedsByteArrayLimit();
  162. e.setObjectId(id);
  163. throw e;
  164. }
  165. if (size < wc.getStreamFileThreshold() || path == null) {
  166. in.reset();
  167. IO.skipFully(in, p);
  168. Inflater inf = wc.inflater();
  169. InputStream zIn = inflate(in, inf);
  170. byte[] data = new byte[(int) size];
  171. IO.readFully(zIn, data, 0, data.length);
  172. checkValidEndOfStream(in, inf, id, hdr);
  173. return new ObjectLoader.SmallObject(type, data);
  174. }
  175. return new LargeObject(type, size, path, id, wc.db);
  176. }
  177. } catch (ZipException badStream) {
  178. throw new CorruptObjectException(id,
  179. JGitText.get().corruptObjectBadStream);
  180. }
  181. }
  182. static long getSize(InputStream in, AnyObjectId id, WindowCursor wc)
  183. throws IOException {
  184. try {
  185. in = buffer(in);
  186. in.mark(20);
  187. final byte[] hdr = new byte[64];
  188. IO.readFully(in, hdr, 0, 2);
  189. if (isStandardFormat(hdr)) {
  190. in.reset();
  191. Inflater inf = wc.inflater();
  192. InputStream zIn = inflate(in, inf);
  193. int avail = readSome(zIn, hdr, 0, 64);
  194. if (avail < 5)
  195. throw new CorruptObjectException(id,
  196. JGitText.get().corruptObjectNoHeader);
  197. final MutableInteger p = new MutableInteger();
  198. Constants.decodeTypeString(id, hdr, (byte) ' ', p);
  199. long size = RawParseUtils.parseLongBase10(hdr, p.value, p);
  200. if (size < 0)
  201. throw new CorruptObjectException(id,
  202. JGitText.get().corruptObjectNegativeSize);
  203. return size;
  204. } else {
  205. readSome(in, hdr, 2, 18);
  206. int c = hdr[0] & 0xff;
  207. long size = c & 15;
  208. int shift = 4;
  209. int p = 1;
  210. while ((c & 0x80) != 0) {
  211. c = hdr[p++] & 0xff;
  212. size += ((long) (c & 0x7f)) << shift;
  213. shift += 7;
  214. }
  215. return size;
  216. }
  217. } catch (ZipException badStream) {
  218. throw new CorruptObjectException(id,
  219. JGitText.get().corruptObjectBadStream);
  220. }
  221. }
  222. private static void checkValidEndOfStream(InputStream in, Inflater inf,
  223. AnyObjectId id, final byte[] buf) throws IOException,
  224. CorruptObjectException {
  225. for (;;) {
  226. int r;
  227. try {
  228. r = inf.inflate(buf);
  229. } catch (DataFormatException e) {
  230. throw new CorruptObjectException(id,
  231. JGitText.get().corruptObjectBadStream);
  232. }
  233. if (r != 0)
  234. throw new CorruptObjectException(id,
  235. JGitText.get().corruptObjectIncorrectLength);
  236. if (inf.finished()) {
  237. if (inf.getRemaining() != 0 || in.read() != -1)
  238. throw new CorruptObjectException(id,
  239. JGitText.get().corruptObjectBadStream);
  240. break;
  241. }
  242. if (!inf.needsInput())
  243. throw new CorruptObjectException(id,
  244. JGitText.get().corruptObjectBadStream);
  245. r = in.read(buf);
  246. if (r <= 0)
  247. throw new CorruptObjectException(id,
  248. JGitText.get().corruptObjectBadStream);
  249. inf.setInput(buf, 0, r);
  250. }
  251. }
  252. private static boolean isStandardFormat(final byte[] hdr) {
  253. /*
  254. * We must determine if the buffer contains the standard
  255. * zlib-deflated stream or the experimental format based
  256. * on the in-pack object format. Compare the header byte
  257. * for each format:
  258. *
  259. * RFC1950 zlib w/ deflate : 0www1000 : 0 <= www <= 7
  260. * Experimental pack-based : Stttssss : ttt = 1,2,3,4
  261. *
  262. * If bit 7 is clear and bits 0-3 equal 8, the buffer MUST be
  263. * in standard loose-object format, UNLESS it is a Git-pack
  264. * format object *exactly* 8 bytes in size when inflated.
  265. *
  266. * However, RFC1950 also specifies that the 1st 16-bit word
  267. * must be divisible by 31 - this checksum tells us our buffer
  268. * is in the standard format, giving a false positive only if
  269. * the 1st word of the Git-pack format object happens to be
  270. * divisible by 31, ie:
  271. * ((byte0 * 256) + byte1) % 31 = 0
  272. * => 0ttt10000www1000 % 31 = 0
  273. *
  274. * As it happens, this case can only arise for www=3 & ttt=1
  275. * - ie, a Commit object, which would have to be 8 bytes in
  276. * size. As no Commit can be that small, we find that the
  277. * combination of these two criteria (bitmask & checksum)
  278. * can always correctly determine the buffer format.
  279. */
  280. final int fb = hdr[0] & 0xff;
  281. return (fb & 0x8f) == 0x08 && (((fb << 8) | hdr[1] & 0xff) % 31) == 0;
  282. }
  283. private static InputStream inflate(final InputStream in, final long size,
  284. final ObjectId id) {
  285. final Inflater inf = InflaterCache.get();
  286. return new InflaterInputStream(in, inf) {
  287. private long remaining = size;
  288. @Override
  289. public int read(byte[] b, int off, int cnt) throws IOException {
  290. try {
  291. int r = super.read(b, off, cnt);
  292. if (r > 0)
  293. remaining -= r;
  294. return r;
  295. } catch (ZipException badStream) {
  296. throw new CorruptObjectException(id,
  297. JGitText.get().corruptObjectBadStream);
  298. }
  299. }
  300. @Override
  301. public void close() throws IOException {
  302. try {
  303. if (remaining <= 0)
  304. checkValidEndOfStream(in, inf, id, new byte[64]);
  305. } finally {
  306. InflaterCache.release(inf);
  307. super.close();
  308. }
  309. }
  310. };
  311. }
  312. private static InflaterInputStream inflate(InputStream in, Inflater inf) {
  313. return new InflaterInputStream(in, inf, BUFFER_SIZE);
  314. }
  315. private static BufferedInputStream buffer(InputStream in) {
  316. return new BufferedInputStream(in, BUFFER_SIZE);
  317. }
  318. private static int readSome(InputStream in, final byte[] hdr, int off,
  319. int cnt) throws IOException {
  320. int avail = 0;
  321. while (0 < cnt) {
  322. int n = in.read(hdr, off, cnt);
  323. if (n < 0)
  324. break;
  325. avail += n;
  326. off += n;
  327. cnt -= n;
  328. }
  329. return avail;
  330. }
  331. private static final class LargeObject extends ObjectLoader {
  332. private final int type;
  333. private final long size;
  334. private final File path;
  335. private final ObjectId id;
  336. private final FileObjectDatabase source;
  337. private LargeObject(int type, long size, File path, AnyObjectId id,
  338. FileObjectDatabase db) {
  339. this.type = type;
  340. this.size = size;
  341. this.path = path;
  342. this.id = id.copy();
  343. this.source = db;
  344. }
  345. @Override
  346. public int getType() {
  347. return type;
  348. }
  349. @Override
  350. public long getSize() {
  351. return size;
  352. }
  353. @Override
  354. public boolean isLarge() {
  355. return true;
  356. }
  357. @Override
  358. public byte[] getCachedBytes() throws LargeObjectException {
  359. throw new LargeObjectException(id);
  360. }
  361. @Override
  362. public ObjectStream openStream() throws MissingObjectException,
  363. IOException {
  364. InputStream in;
  365. try {
  366. in = buffer(new FileInputStream(path));
  367. } catch (FileNotFoundException gone) {
  368. // If the loose file no longer exists, it may have been
  369. // moved into a pack file in the mean time. Try again
  370. // to locate the object.
  371. //
  372. return source.open(id, type).openStream();
  373. }
  374. boolean ok = false;
  375. try {
  376. final byte[] hdr = new byte[64];
  377. in.mark(20);
  378. IO.readFully(in, hdr, 0, 2);
  379. if (isStandardFormat(hdr)) {
  380. in.reset();
  381. in = buffer(inflate(in, size, id));
  382. while (0 < in.read())
  383. continue;
  384. } else {
  385. readSome(in, hdr, 2, 18);
  386. int c = hdr[0] & 0xff;
  387. int p = 1;
  388. while ((c & 0x80) != 0)
  389. c = hdr[p++] & 0xff;
  390. in.reset();
  391. IO.skipFully(in, p);
  392. in = buffer(inflate(in, size, id));
  393. }
  394. ok = true;
  395. return new ObjectStream.Filter(type, size, in);
  396. } finally {
  397. if (!ok)
  398. in.close();
  399. }
  400. }
  401. }
  402. }