You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

DfsPackFile.java 36KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247
  1. /*
  2. * Copyright (C) 2008-2011, Google Inc.
  3. * Copyright (C) 2007, Robin Rosenberg <robin.rosenberg@dewire.com>
  4. * Copyright (C) 2006-2008, Shawn O. Pearce <spearce@spearce.org>
  5. * and other copyright owners as documented in the project's IP log.
  6. *
  7. * This program and the accompanying materials are made available
  8. * under the terms of the Eclipse Distribution License v1.0 which
  9. * accompanies this distribution, is reproduced below, and is
  10. * available at http://www.eclipse.org/org/documents/edl-v10.php
  11. *
  12. * All rights reserved.
  13. *
  14. * Redistribution and use in source and binary forms, with or
  15. * without modification, are permitted provided that the following
  16. * conditions are met:
  17. *
  18. * - Redistributions of source code must retain the above copyright
  19. * notice, this list of conditions and the following disclaimer.
  20. *
  21. * - Redistributions in binary form must reproduce the above
  22. * copyright notice, this list of conditions and the following
  23. * disclaimer in the documentation and/or other materials provided
  24. * with the distribution.
  25. *
  26. * - Neither the name of the Eclipse Foundation, Inc. nor the
  27. * names of its contributors may be used to endorse or promote
  28. * products derived from this software without specific prior
  29. * written permission.
  30. *
  31. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  32. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  33. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  34. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  35. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  36. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  37. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  38. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  39. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  40. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  41. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  42. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  43. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  44. */
  45. package org.eclipse.jgit.internal.storage.dfs;
  46. import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
  47. import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
  48. import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
  49. import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
  50. import java.io.BufferedInputStream;
  51. import java.io.EOFException;
  52. import java.io.IOException;
  53. import java.io.InputStream;
  54. import java.nio.ByteBuffer;
  55. import java.nio.channels.Channels;
  56. import java.text.MessageFormat;
  57. import java.util.Set;
  58. import java.util.zip.CRC32;
  59. import java.util.zip.DataFormatException;
  60. import java.util.zip.Inflater;
  61. import org.eclipse.jgit.errors.CorruptObjectException;
  62. import org.eclipse.jgit.errors.LargeObjectException;
  63. import org.eclipse.jgit.errors.MissingObjectException;
  64. import org.eclipse.jgit.errors.PackInvalidException;
  65. import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException;
  66. import org.eclipse.jgit.internal.JGitText;
  67. import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
  68. import org.eclipse.jgit.internal.storage.file.PackIndex;
  69. import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
  70. import org.eclipse.jgit.internal.storage.pack.BinaryDelta;
  71. import org.eclipse.jgit.internal.storage.pack.PackExt;
  72. import org.eclipse.jgit.internal.storage.pack.PackOutputStream;
  73. import org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation;
  74. import org.eclipse.jgit.lib.AbbreviatedObjectId;
  75. import org.eclipse.jgit.lib.AnyObjectId;
  76. import org.eclipse.jgit.lib.Constants;
  77. import org.eclipse.jgit.lib.ObjectId;
  78. import org.eclipse.jgit.lib.ObjectLoader;
  79. import org.eclipse.jgit.lib.Repository;
  80. import org.eclipse.jgit.util.LongList;
  81. /**
  82. * A Git version 2 pack file representation. A pack file contains Git objects in
  83. * delta packed format yielding high compression of lots of object where some
  84. * objects are similar.
  85. */
  86. public final class DfsPackFile {
  87. /**
  88. * File offset used to cache {@link #index} in {@link DfsBlockCache}.
  89. * <p>
  90. * To better manage memory, the forward index is stored as a single block in
  91. * the block cache under this file position. A negative value is used
  92. * because it cannot occur in a normal pack file, and it is less likely to
  93. * collide with a valid data block from the file as the high bits will all
  94. * be set when treated as an unsigned long by the cache code.
  95. */
  96. private static final long POS_INDEX = -1;
  97. /** Offset used to cache {@link #reverseIndex}. See {@link #POS_INDEX}. */
  98. private static final long POS_REVERSE_INDEX = -2;
  99. /** Offset used to cache {@link #bitmapIndex}. See {@link #POS_INDEX}. */
  100. private static final long POS_BITMAP_INDEX = -3;
  101. /** Cache that owns this pack file and its data. */
  102. private final DfsBlockCache cache;
  103. /** Description of the pack file's storage. */
  104. private final DfsPackDescription packDesc;
  105. /** Unique identity of this pack while in-memory. */
  106. final DfsPackKey key;
  107. /**
  108. * Total number of bytes in this pack file.
  109. * <p>
  110. * This field initializes to -1 and gets populated when a block is loaded.
  111. */
  112. volatile long length;
  113. /**
  114. * Preferred alignment for loading blocks from the backing file.
  115. * <p>
  116. * It is initialized to 0 and filled in on the first read made from the
  117. * file. Block sizes may be odd, e.g. 4091, caused by the underling DFS
  118. * storing 4091 user bytes and 5 bytes block metadata into a lower level
  119. * 4096 byte block on disk.
  120. */
  121. private volatile int blockSize;
  122. /** True once corruption has been detected that cannot be worked around. */
  123. private volatile boolean invalid;
  124. /** Exception that caused the packfile to be flagged as invalid */
  125. private volatile Exception invalidatingCause;
  126. /**
  127. * Lock for initialization of {@link #index} and {@link #corruptObjects}.
  128. * <p>
  129. * This lock ensures only one thread can perform the initialization work.
  130. */
  131. private final Object initLock = new Object();
  132. /** Index mapping {@link ObjectId} to position within the pack stream. */
  133. private volatile DfsBlockCache.Ref<PackIndex> index;
  134. /** Reverse version of {@link #index} mapping position to {@link ObjectId}. */
  135. private volatile DfsBlockCache.Ref<PackReverseIndex> reverseIndex;
  136. /** Index of compressed bitmap mapping entire object graph. */
  137. private volatile DfsBlockCache.Ref<PackBitmapIndex> bitmapIndex;
  138. /**
  139. * Objects we have tried to read, and discovered to be corrupt.
  140. * <p>
  141. * The list is allocated after the first corruption is found, and filled in
  142. * as more entries are discovered. Typically this list is never used, as
  143. * pack files do not usually contain corrupt objects.
  144. */
  145. private volatile LongList corruptObjects;
  146. /**
  147. * Construct a reader for an existing, packfile.
  148. *
  149. * @param cache
  150. * cache that owns the pack data.
  151. * @param desc
  152. * description of the pack within the DFS.
  153. * @param key
  154. * interned key used to identify blocks in the block cache.
  155. */
  156. DfsPackFile(DfsBlockCache cache, DfsPackDescription desc, DfsPackKey key) {
  157. this.cache = cache;
  158. this.packDesc = desc;
  159. this.key = key;
  160. length = desc.getFileSize(PACK);
  161. if (length <= 0)
  162. length = -1;
  163. }
  164. /** @return description that was originally used to configure this pack file. */
  165. public DfsPackDescription getPackDescription() {
  166. return packDesc;
  167. }
  168. /**
  169. * @return whether the pack index file is loaded and cached in memory.
  170. */
  171. public boolean isIndexLoaded() {
  172. DfsBlockCache.Ref<PackIndex> idxref = index;
  173. return idxref != null && idxref.has();
  174. }
  175. /** @return bytes cached in memory for this pack, excluding the index. */
  176. public long getCachedSize() {
  177. return key.cachedSize.get();
  178. }
  179. String getPackName() {
  180. return packDesc.getFileName(PACK);
  181. }
  182. void setBlockSize(int newSize) {
  183. blockSize = newSize;
  184. }
  185. void setPackIndex(PackIndex idx) {
  186. long objCnt = idx.getObjectCount();
  187. int recSize = Constants.OBJECT_ID_LENGTH + 8;
  188. int sz = (int) Math.min(objCnt * recSize, Integer.MAX_VALUE);
  189. index = cache.put(key, POS_INDEX, sz, idx);
  190. }
  191. /**
  192. * Get the PackIndex for this PackFile.
  193. *
  194. * @param ctx
  195. * reader context to support reading from the backing store if
  196. * the index is not already loaded in memory.
  197. * @return the PackIndex.
  198. * @throws IOException
  199. * the pack index is not available, or is corrupt.
  200. */
  201. public PackIndex getPackIndex(DfsReader ctx) throws IOException {
  202. return idx(ctx);
  203. }
  204. private PackIndex idx(DfsReader ctx) throws IOException {
  205. DfsBlockCache.Ref<PackIndex> idxref = index;
  206. if (idxref != null) {
  207. PackIndex idx = idxref.get();
  208. if (idx != null)
  209. return idx;
  210. }
  211. if (invalid) {
  212. throw new PackInvalidException(getPackName(), invalidatingCause);
  213. }
  214. Repository.getGlobalListenerList()
  215. .dispatch(new BeforeDfsPackIndexLoadedEvent(this));
  216. synchronized (initLock) {
  217. idxref = index;
  218. if (idxref != null) {
  219. PackIndex idx = idxref.get();
  220. if (idx != null)
  221. return idx;
  222. }
  223. PackIndex idx;
  224. try {
  225. ctx.stats.readIdx++;
  226. long start = System.nanoTime();
  227. ReadableChannel rc = ctx.db.openFile(packDesc, INDEX);
  228. try {
  229. InputStream in = Channels.newInputStream(rc);
  230. int wantSize = 8192;
  231. int bs = rc.blockSize();
  232. if (0 < bs && bs < wantSize)
  233. bs = (wantSize / bs) * bs;
  234. else if (bs <= 0)
  235. bs = wantSize;
  236. idx = PackIndex.read(new BufferedInputStream(in, bs));
  237. ctx.stats.readIdxBytes += rc.position();
  238. } finally {
  239. rc.close();
  240. ctx.stats.readIdxMicros += elapsedMicros(start);
  241. }
  242. } catch (EOFException e) {
  243. invalid = true;
  244. invalidatingCause = e;
  245. IOException e2 = new IOException(MessageFormat.format(
  246. DfsText.get().shortReadOfIndex,
  247. packDesc.getFileName(INDEX)));
  248. e2.initCause(e);
  249. throw e2;
  250. } catch (IOException e) {
  251. invalid = true;
  252. invalidatingCause = e;
  253. IOException e2 = new IOException(MessageFormat.format(
  254. DfsText.get().cannotReadIndex,
  255. packDesc.getFileName(INDEX)));
  256. e2.initCause(e);
  257. throw e2;
  258. }
  259. setPackIndex(idx);
  260. return idx;
  261. }
  262. }
  263. private static long elapsedMicros(long start) {
  264. return (System.nanoTime() - start) / 1000L;
  265. }
  266. final boolean isGarbage() {
  267. return packDesc.getPackSource() == UNREACHABLE_GARBAGE;
  268. }
  269. PackBitmapIndex getBitmapIndex(DfsReader ctx) throws IOException {
  270. if (invalid || isGarbage())
  271. return null;
  272. DfsBlockCache.Ref<PackBitmapIndex> idxref = bitmapIndex;
  273. if (idxref != null) {
  274. PackBitmapIndex idx = idxref.get();
  275. if (idx != null)
  276. return idx;
  277. }
  278. if (!packDesc.hasFileExt(PackExt.BITMAP_INDEX))
  279. return null;
  280. synchronized (initLock) {
  281. idxref = bitmapIndex;
  282. if (idxref != null) {
  283. PackBitmapIndex idx = idxref.get();
  284. if (idx != null)
  285. return idx;
  286. }
  287. long size;
  288. PackBitmapIndex idx;
  289. try {
  290. ctx.stats.readBitmap++;
  291. long start = System.nanoTime();
  292. ReadableChannel rc = ctx.db.openFile(packDesc, BITMAP_INDEX);
  293. try {
  294. InputStream in = Channels.newInputStream(rc);
  295. int wantSize = 8192;
  296. int bs = rc.blockSize();
  297. if (0 < bs && bs < wantSize)
  298. bs = (wantSize / bs) * bs;
  299. else if (bs <= 0)
  300. bs = wantSize;
  301. in = new BufferedInputStream(in, bs);
  302. idx = PackBitmapIndex.read(
  303. in, idx(ctx), getReverseIdx(ctx));
  304. } finally {
  305. size = rc.position();
  306. rc.close();
  307. ctx.stats.readIdxBytes += size;
  308. ctx.stats.readIdxMicros += elapsedMicros(start);
  309. }
  310. } catch (EOFException e) {
  311. IOException e2 = new IOException(MessageFormat.format(
  312. DfsText.get().shortReadOfIndex,
  313. packDesc.getFileName(BITMAP_INDEX)));
  314. e2.initCause(e);
  315. throw e2;
  316. } catch (IOException e) {
  317. IOException e2 = new IOException(MessageFormat.format(
  318. DfsText.get().cannotReadIndex,
  319. packDesc.getFileName(BITMAP_INDEX)));
  320. e2.initCause(e);
  321. throw e2;
  322. }
  323. bitmapIndex = cache.put(key, POS_BITMAP_INDEX,
  324. (int) Math.min(size, Integer.MAX_VALUE), idx);
  325. return idx;
  326. }
  327. }
  328. PackReverseIndex getReverseIdx(DfsReader ctx) throws IOException {
  329. DfsBlockCache.Ref<PackReverseIndex> revref = reverseIndex;
  330. if (revref != null) {
  331. PackReverseIndex revidx = revref.get();
  332. if (revidx != null)
  333. return revidx;
  334. }
  335. synchronized (initLock) {
  336. revref = reverseIndex;
  337. if (revref != null) {
  338. PackReverseIndex revidx = revref.get();
  339. if (revidx != null)
  340. return revidx;
  341. }
  342. PackIndex idx = idx(ctx);
  343. PackReverseIndex revidx = new PackReverseIndex(idx);
  344. int sz = (int) Math.min(
  345. idx.getObjectCount() * 8, Integer.MAX_VALUE);
  346. reverseIndex = cache.put(key, POS_REVERSE_INDEX, sz, revidx);
  347. return revidx;
  348. }
  349. }
  350. /**
  351. * Check if an object is stored within this pack.
  352. *
  353. * @param ctx
  354. * reader context to support reading from the backing store if
  355. * the index is not already loaded in memory.
  356. * @param id
  357. * object to be located.
  358. * @return true if the object exists in this pack; false if it does not.
  359. * @throws IOException
  360. * the pack index is not available, or is corrupt.
  361. */
  362. public boolean hasObject(DfsReader ctx, AnyObjectId id) throws IOException {
  363. final long offset = idx(ctx).findOffset(id);
  364. return 0 < offset && !isCorrupt(offset);
  365. }
  366. /**
  367. * Get an object from this pack.
  368. *
  369. * @param ctx
  370. * temporary working space associated with the calling thread.
  371. * @param id
  372. * the object to obtain from the pack. Must not be null.
  373. * @return the object loader for the requested object if it is contained in
  374. * this pack; null if the object was not found.
  375. * @throws IOException
  376. * the pack file or the index could not be read.
  377. */
  378. ObjectLoader get(DfsReader ctx, AnyObjectId id)
  379. throws IOException {
  380. long offset = idx(ctx).findOffset(id);
  381. return 0 < offset && !isCorrupt(offset) ? load(ctx, offset) : null;
  382. }
  383. long findOffset(DfsReader ctx, AnyObjectId id) throws IOException {
  384. return idx(ctx).findOffset(id);
  385. }
  386. void resolve(DfsReader ctx, Set<ObjectId> matches, AbbreviatedObjectId id,
  387. int matchLimit) throws IOException {
  388. idx(ctx).resolve(matches, id, matchLimit);
  389. }
  390. /** Release all memory used by this DfsPackFile instance. */
  391. public void close() {
  392. cache.remove(this);
  393. index = null;
  394. reverseIndex = null;
  395. }
  396. /**
  397. * Obtain the total number of objects available in this pack. This method
  398. * relies on pack index, giving number of effectively available objects.
  399. *
  400. * @param ctx
  401. * current reader for the calling thread.
  402. * @return number of objects in index of this pack, likewise in this pack
  403. * @throws IOException
  404. * the index file cannot be loaded into memory.
  405. */
  406. long getObjectCount(DfsReader ctx) throws IOException {
  407. return idx(ctx).getObjectCount();
  408. }
  409. private byte[] decompress(long position, int sz, DfsReader ctx)
  410. throws IOException, DataFormatException {
  411. byte[] dstbuf;
  412. try {
  413. dstbuf = new byte[sz];
  414. } catch (OutOfMemoryError noMemory) {
  415. // The size may be larger than our heap allows, return null to
  416. // let the caller know allocation isn't possible and it should
  417. // use the large object streaming approach instead.
  418. //
  419. // For example, this can occur when sz is 640 MB, and JRE
  420. // maximum heap size is only 256 MB. Even if the JRE has
  421. // 200 MB free, it cannot allocate a 640 MB byte array.
  422. return null;
  423. }
  424. if (ctx.inflate(this, position, dstbuf, false) != sz)
  425. throw new EOFException(MessageFormat.format(
  426. JGitText.get().shortCompressedStreamAt,
  427. Long.valueOf(position)));
  428. return dstbuf;
  429. }
  430. void copyPackAsIs(PackOutputStream out, DfsReader ctx)
  431. throws IOException {
  432. // If the length hasn't been determined yet, pin to set it.
  433. if (length == -1) {
  434. ctx.pin(this, 0);
  435. ctx.unpin();
  436. }
  437. if (cache.shouldCopyThroughCache(length))
  438. copyPackThroughCache(out, ctx);
  439. else
  440. copyPackBypassCache(out, ctx);
  441. }
  442. private void copyPackThroughCache(PackOutputStream out, DfsReader ctx)
  443. throws IOException {
  444. long position = 12;
  445. long remaining = length - (12 + 20);
  446. while (0 < remaining) {
  447. DfsBlock b = cache.getOrLoad(this, position, ctx);
  448. int ptr = (int) (position - b.start);
  449. int n = (int) Math.min(b.size() - ptr, remaining);
  450. b.write(out, position, n);
  451. position += n;
  452. remaining -= n;
  453. }
  454. }
  455. private long copyPackBypassCache(PackOutputStream out, DfsReader ctx)
  456. throws IOException {
  457. try (ReadableChannel rc = ctx.db.openFile(packDesc, PACK)) {
  458. ByteBuffer buf = newCopyBuffer(out, rc);
  459. if (ctx.getOptions().getStreamPackBufferSize() > 0)
  460. rc.setReadAheadBytes(ctx.getOptions().getStreamPackBufferSize());
  461. long position = 12;
  462. long remaining = length - (12 + 20);
  463. boolean packHeadSkipped = false;
  464. while (0 < remaining) {
  465. DfsBlock b = cache.get(key, alignToBlock(position));
  466. if (b != null) {
  467. int ptr = (int) (position - b.start);
  468. int n = (int) Math.min(b.size() - ptr, remaining);
  469. b.write(out, position, n);
  470. position += n;
  471. remaining -= n;
  472. rc.position(position);
  473. packHeadSkipped = true;
  474. continue;
  475. }
  476. buf.position(0);
  477. int n = read(rc, buf);
  478. if (n <= 0)
  479. throw packfileIsTruncated();
  480. else if (n > remaining)
  481. n = (int) remaining;
  482. if (!packHeadSkipped) {
  483. // Need skip the 'PACK' header for the first read
  484. out.write(buf.array(), 12, n - 12);
  485. packHeadSkipped = true;
  486. } else {
  487. out.write(buf.array(), 0, n);
  488. }
  489. position += n;
  490. remaining -= n;
  491. }
  492. return position;
  493. }
  494. }
  495. private ByteBuffer newCopyBuffer(PackOutputStream out, ReadableChannel rc) {
  496. int bs = blockSize(rc);
  497. byte[] copyBuf = out.getCopyBuffer();
  498. if (bs > copyBuf.length)
  499. copyBuf = new byte[bs];
  500. return ByteBuffer.wrap(copyBuf, 0, bs);
  501. }
  502. void copyAsIs(PackOutputStream out, DfsObjectToPack src,
  503. boolean validate, DfsReader ctx) throws IOException,
  504. StoredObjectRepresentationNotAvailableException {
  505. final CRC32 crc1 = validate ? new CRC32() : null;
  506. final CRC32 crc2 = validate ? new CRC32() : null;
  507. final byte[] buf = out.getCopyBuffer();
  508. // Rip apart the header so we can discover the size.
  509. //
  510. try {
  511. readFully(src.offset, buf, 0, 20, ctx);
  512. } catch (IOException ioError) {
  513. StoredObjectRepresentationNotAvailableException gone;
  514. gone = new StoredObjectRepresentationNotAvailableException(src);
  515. gone.initCause(ioError);
  516. throw gone;
  517. }
  518. int c = buf[0] & 0xff;
  519. final int typeCode = (c >> 4) & 7;
  520. long inflatedLength = c & 15;
  521. int shift = 4;
  522. int headerCnt = 1;
  523. while ((c & 0x80) != 0) {
  524. c = buf[headerCnt++] & 0xff;
  525. inflatedLength += ((long) (c & 0x7f)) << shift;
  526. shift += 7;
  527. }
  528. if (typeCode == Constants.OBJ_OFS_DELTA) {
  529. do {
  530. c = buf[headerCnt++] & 0xff;
  531. } while ((c & 128) != 0);
  532. if (validate) {
  533. assert(crc1 != null && crc2 != null);
  534. crc1.update(buf, 0, headerCnt);
  535. crc2.update(buf, 0, headerCnt);
  536. }
  537. } else if (typeCode == Constants.OBJ_REF_DELTA) {
  538. if (validate) {
  539. assert(crc1 != null && crc2 != null);
  540. crc1.update(buf, 0, headerCnt);
  541. crc2.update(buf, 0, headerCnt);
  542. }
  543. readFully(src.offset + headerCnt, buf, 0, 20, ctx);
  544. if (validate) {
  545. assert(crc1 != null && crc2 != null);
  546. crc1.update(buf, 0, 20);
  547. crc2.update(buf, 0, 20);
  548. }
  549. headerCnt += 20;
  550. } else if (validate) {
  551. assert(crc1 != null && crc2 != null);
  552. crc1.update(buf, 0, headerCnt);
  553. crc2.update(buf, 0, headerCnt);
  554. }
  555. final long dataOffset = src.offset + headerCnt;
  556. final long dataLength = src.length;
  557. final long expectedCRC;
  558. final DfsBlock quickCopy;
  559. // Verify the object isn't corrupt before sending. If it is,
  560. // we report it missing instead.
  561. //
  562. try {
  563. quickCopy = ctx.quickCopy(this, dataOffset, dataLength);
  564. if (validate && idx(ctx).hasCRC32Support()) {
  565. assert(crc1 != null);
  566. // Index has the CRC32 code cached, validate the object.
  567. //
  568. expectedCRC = idx(ctx).findCRC32(src);
  569. if (quickCopy != null) {
  570. quickCopy.crc32(crc1, dataOffset, (int) dataLength);
  571. } else {
  572. long pos = dataOffset;
  573. long cnt = dataLength;
  574. while (cnt > 0) {
  575. final int n = (int) Math.min(cnt, buf.length);
  576. readFully(pos, buf, 0, n, ctx);
  577. crc1.update(buf, 0, n);
  578. pos += n;
  579. cnt -= n;
  580. }
  581. }
  582. if (crc1.getValue() != expectedCRC) {
  583. setCorrupt(src.offset);
  584. throw new CorruptObjectException(MessageFormat.format(
  585. JGitText.get().objectAtHasBadZlibStream,
  586. Long.valueOf(src.offset), getPackName()));
  587. }
  588. } else if (validate) {
  589. assert(crc1 != null);
  590. // We don't have a CRC32 code in the index, so compute it
  591. // now while inflating the raw data to get zlib to tell us
  592. // whether or not the data is safe.
  593. //
  594. Inflater inf = ctx.inflater();
  595. byte[] tmp = new byte[1024];
  596. if (quickCopy != null) {
  597. quickCopy.check(inf, tmp, dataOffset, (int) dataLength);
  598. } else {
  599. long pos = dataOffset;
  600. long cnt = dataLength;
  601. while (cnt > 0) {
  602. final int n = (int) Math.min(cnt, buf.length);
  603. readFully(pos, buf, 0, n, ctx);
  604. crc1.update(buf, 0, n);
  605. inf.setInput(buf, 0, n);
  606. while (inf.inflate(tmp, 0, tmp.length) > 0)
  607. continue;
  608. pos += n;
  609. cnt -= n;
  610. }
  611. }
  612. if (!inf.finished() || inf.getBytesRead() != dataLength) {
  613. setCorrupt(src.offset);
  614. throw new EOFException(MessageFormat.format(
  615. JGitText.get().shortCompressedStreamAt,
  616. Long.valueOf(src.offset)));
  617. }
  618. expectedCRC = crc1.getValue();
  619. } else {
  620. expectedCRC = -1;
  621. }
  622. } catch (DataFormatException dataFormat) {
  623. setCorrupt(src.offset);
  624. CorruptObjectException corruptObject = new CorruptObjectException(
  625. MessageFormat.format(
  626. JGitText.get().objectAtHasBadZlibStream,
  627. Long.valueOf(src.offset), getPackName()));
  628. corruptObject.initCause(dataFormat);
  629. StoredObjectRepresentationNotAvailableException gone;
  630. gone = new StoredObjectRepresentationNotAvailableException(src);
  631. gone.initCause(corruptObject);
  632. throw gone;
  633. } catch (IOException ioError) {
  634. StoredObjectRepresentationNotAvailableException gone;
  635. gone = new StoredObjectRepresentationNotAvailableException(src);
  636. gone.initCause(ioError);
  637. throw gone;
  638. }
  639. if (quickCopy != null) {
  640. // The entire object fits into a single byte array window slice,
  641. // and we have it pinned. Write this out without copying.
  642. //
  643. out.writeHeader(src, inflatedLength);
  644. quickCopy.write(out, dataOffset, (int) dataLength);
  645. } else if (dataLength <= buf.length) {
  646. // Tiny optimization: Lots of objects are very small deltas or
  647. // deflated commits that are likely to fit in the copy buffer.
  648. //
  649. if (!validate) {
  650. long pos = dataOffset;
  651. long cnt = dataLength;
  652. while (cnt > 0) {
  653. final int n = (int) Math.min(cnt, buf.length);
  654. readFully(pos, buf, 0, n, ctx);
  655. pos += n;
  656. cnt -= n;
  657. }
  658. }
  659. out.writeHeader(src, inflatedLength);
  660. out.write(buf, 0, (int) dataLength);
  661. } else {
  662. // Now we are committed to sending the object. As we spool it out,
  663. // check its CRC32 code to make sure there wasn't corruption between
  664. // the verification we did above, and us actually outputting it.
  665. //
  666. out.writeHeader(src, inflatedLength);
  667. long pos = dataOffset;
  668. long cnt = dataLength;
  669. while (cnt > 0) {
  670. final int n = (int) Math.min(cnt, buf.length);
  671. readFully(pos, buf, 0, n, ctx);
  672. if (validate) {
  673. assert(crc2 != null);
  674. crc2.update(buf, 0, n);
  675. }
  676. out.write(buf, 0, n);
  677. pos += n;
  678. cnt -= n;
  679. }
  680. if (validate) {
  681. assert(crc2 != null);
  682. if (crc2.getValue() != expectedCRC) {
  683. throw new CorruptObjectException(MessageFormat.format(
  684. JGitText.get().objectAtHasBadZlibStream,
  685. Long.valueOf(src.offset), getPackName()));
  686. }
  687. }
  688. }
  689. }
  690. boolean invalid() {
  691. return invalid;
  692. }
  693. void setInvalid() {
  694. invalid = true;
  695. }
  696. private IOException packfileIsTruncated() {
  697. invalid = true;
  698. IOException exc = new IOException(MessageFormat.format(
  699. JGitText.get().packfileIsTruncated, getPackName()));
  700. invalidatingCause = exc;
  701. return exc;
  702. }
  703. private void readFully(long position, byte[] dstbuf, int dstoff, int cnt,
  704. DfsReader ctx) throws IOException {
  705. if (ctx.copy(this, position, dstbuf, dstoff, cnt) != cnt)
  706. throw new EOFException();
  707. }
  708. long alignToBlock(long pos) {
  709. int size = blockSize;
  710. if (size == 0)
  711. size = cache.getBlockSize();
  712. return (pos / size) * size;
  713. }
  714. DfsBlock getOrLoadBlock(long pos, DfsReader ctx) throws IOException {
  715. return cache.getOrLoad(this, pos, ctx);
  716. }
  717. DfsBlock readOneBlock(long pos, DfsReader ctx)
  718. throws IOException {
  719. if (invalid) {
  720. throw new PackInvalidException(getPackName(), invalidatingCause);
  721. }
  722. ctx.stats.readBlock++;
  723. long start = System.nanoTime();
  724. ReadableChannel rc = ctx.db.openFile(packDesc, PACK);
  725. try {
  726. int size = blockSize(rc);
  727. pos = (pos / size) * size;
  728. // If the size of the file is not yet known, try to discover it.
  729. // Channels may choose to return -1 to indicate they don't
  730. // know the length yet, in this case read up to the size unit
  731. // given by the caller, then recheck the length.
  732. long len = length;
  733. if (len < 0) {
  734. len = rc.size();
  735. if (0 <= len)
  736. length = len;
  737. }
  738. if (0 <= len && len < pos + size)
  739. size = (int) (len - pos);
  740. if (size <= 0)
  741. throw new EOFException(MessageFormat.format(
  742. DfsText.get().shortReadOfBlock, Long.valueOf(pos),
  743. getPackName(), Long.valueOf(0), Long.valueOf(0)));
  744. byte[] buf = new byte[size];
  745. rc.position(pos);
  746. int cnt = read(rc, ByteBuffer.wrap(buf, 0, size));
  747. ctx.stats.readBlockBytes += cnt;
  748. if (cnt != size) {
  749. if (0 <= len) {
  750. throw new EOFException(MessageFormat.format(
  751. DfsText.get().shortReadOfBlock,
  752. Long.valueOf(pos),
  753. getPackName(),
  754. Integer.valueOf(size),
  755. Integer.valueOf(cnt)));
  756. }
  757. // Assume the entire thing was read in a single shot, compact
  758. // the buffer to only the space required.
  759. byte[] n = new byte[cnt];
  760. System.arraycopy(buf, 0, n, 0, n.length);
  761. buf = n;
  762. } else if (len < 0) {
  763. // With no length at the start of the read, the channel should
  764. // have the length available at the end.
  765. length = len = rc.size();
  766. }
  767. return new DfsBlock(key, pos, buf);
  768. } finally {
  769. rc.close();
  770. ctx.stats.readBlockMicros += elapsedMicros(start);
  771. }
  772. }
  773. private int blockSize(ReadableChannel rc) {
  774. // If the block alignment is not yet known, discover it. Prefer the
  775. // larger size from either the cache or the file itself.
  776. int size = blockSize;
  777. if (size == 0) {
  778. size = rc.blockSize();
  779. if (size <= 0)
  780. size = cache.getBlockSize();
  781. else if (size < cache.getBlockSize())
  782. size = (cache.getBlockSize() / size) * size;
  783. blockSize = size;
  784. }
  785. return size;
  786. }
  787. private static int read(ReadableChannel rc, ByteBuffer buf)
  788. throws IOException {
  789. int n;
  790. do {
  791. n = rc.read(buf);
  792. } while (0 < n && buf.hasRemaining());
  793. return buf.position();
  794. }
  795. ObjectLoader load(DfsReader ctx, long pos)
  796. throws IOException {
  797. try {
  798. final byte[] ib = ctx.tempId;
  799. Delta delta = null;
  800. byte[] data = null;
  801. int type = Constants.OBJ_BAD;
  802. boolean cached = false;
  803. SEARCH: for (;;) {
  804. readFully(pos, ib, 0, 20, ctx);
  805. int c = ib[0] & 0xff;
  806. final int typeCode = (c >> 4) & 7;
  807. long sz = c & 15;
  808. int shift = 4;
  809. int p = 1;
  810. while ((c & 0x80) != 0) {
  811. c = ib[p++] & 0xff;
  812. sz += ((long) (c & 0x7f)) << shift;
  813. shift += 7;
  814. }
  815. switch (typeCode) {
  816. case Constants.OBJ_COMMIT:
  817. case Constants.OBJ_TREE:
  818. case Constants.OBJ_BLOB:
  819. case Constants.OBJ_TAG: {
  820. if (delta != null) {
  821. data = decompress(pos + p, (int) sz, ctx);
  822. type = typeCode;
  823. break SEARCH;
  824. }
  825. if (sz < ctx.getStreamFileThreshold()) {
  826. data = decompress(pos + p, (int) sz, ctx);
  827. if (data != null)
  828. return new ObjectLoader.SmallObject(typeCode, data);
  829. }
  830. return new LargePackedWholeObject(typeCode, sz, pos, p, this, ctx.db);
  831. }
  832. case Constants.OBJ_OFS_DELTA: {
  833. c = ib[p++] & 0xff;
  834. long base = c & 127;
  835. while ((c & 128) != 0) {
  836. base += 1;
  837. c = ib[p++] & 0xff;
  838. base <<= 7;
  839. base += (c & 127);
  840. }
  841. base = pos - base;
  842. delta = new Delta(delta, pos, (int) sz, p, base);
  843. if (sz != delta.deltaSize)
  844. break SEARCH;
  845. DeltaBaseCache.Entry e = ctx.getDeltaBaseCache().get(key, base);
  846. if (e != null) {
  847. type = e.type;
  848. data = e.data;
  849. cached = true;
  850. break SEARCH;
  851. }
  852. pos = base;
  853. continue SEARCH;
  854. }
  855. case Constants.OBJ_REF_DELTA: {
  856. readFully(pos + p, ib, 0, 20, ctx);
  857. long base = findDeltaBase(ctx, ObjectId.fromRaw(ib));
  858. delta = new Delta(delta, pos, (int) sz, p + 20, base);
  859. if (sz != delta.deltaSize)
  860. break SEARCH;
  861. DeltaBaseCache.Entry e = ctx.getDeltaBaseCache().get(key, base);
  862. if (e != null) {
  863. type = e.type;
  864. data = e.data;
  865. cached = true;
  866. break SEARCH;
  867. }
  868. pos = base;
  869. continue SEARCH;
  870. }
  871. default:
  872. throw new IOException(MessageFormat.format(
  873. JGitText.get().unknownObjectType, Integer.valueOf(typeCode)));
  874. }
  875. }
  876. // At this point there is at least one delta to apply to data.
  877. // (Whole objects with no deltas to apply return early above.)
  878. if (data == null)
  879. throw new LargeObjectException();
  880. assert(delta != null);
  881. do {
  882. // Cache only the base immediately before desired object.
  883. if (cached)
  884. cached = false;
  885. else if (delta.next == null)
  886. ctx.getDeltaBaseCache().put(key, delta.basePos, type, data);
  887. pos = delta.deltaPos;
  888. byte[] cmds = decompress(pos + delta.hdrLen, delta.deltaSize, ctx);
  889. if (cmds == null) {
  890. data = null; // Discard base in case of OutOfMemoryError
  891. throw new LargeObjectException();
  892. }
  893. final long sz = BinaryDelta.getResultSize(cmds);
  894. if (Integer.MAX_VALUE <= sz)
  895. throw new LargeObjectException.ExceedsByteArrayLimit();
  896. final byte[] result;
  897. try {
  898. result = new byte[(int) sz];
  899. } catch (OutOfMemoryError tooBig) {
  900. data = null; // Discard base in case of OutOfMemoryError
  901. cmds = null;
  902. throw new LargeObjectException.OutOfMemory(tooBig);
  903. }
  904. BinaryDelta.apply(data, cmds, result);
  905. data = result;
  906. delta = delta.next;
  907. } while (delta != null);
  908. return new ObjectLoader.SmallObject(type, data);
  909. } catch (DataFormatException dfe) {
  910. CorruptObjectException coe = new CorruptObjectException(
  911. MessageFormat.format(
  912. JGitText.get().objectAtHasBadZlibStream, Long.valueOf(pos),
  913. getPackName()));
  914. coe.initCause(dfe);
  915. throw coe;
  916. }
  917. }
  918. private long findDeltaBase(DfsReader ctx, ObjectId baseId)
  919. throws IOException, MissingObjectException {
  920. long ofs = idx(ctx).findOffset(baseId);
  921. if (ofs < 0)
  922. throw new MissingObjectException(baseId,
  923. JGitText.get().missingDeltaBase);
  924. return ofs;
  925. }
  926. private static class Delta {
  927. /** Child that applies onto this object. */
  928. final Delta next;
  929. /** Offset of the delta object. */
  930. final long deltaPos;
  931. /** Size of the inflated delta stream. */
  932. final int deltaSize;
  933. /** Total size of the delta's pack entry header (including base). */
  934. final int hdrLen;
  935. /** Offset of the base object this delta applies onto. */
  936. final long basePos;
  937. Delta(Delta next, long ofs, int sz, int hdrLen, long baseOffset) {
  938. this.next = next;
  939. this.deltaPos = ofs;
  940. this.deltaSize = sz;
  941. this.hdrLen = hdrLen;
  942. this.basePos = baseOffset;
  943. }
  944. }
  945. byte[] getDeltaHeader(DfsReader wc, long pos)
  946. throws IOException, DataFormatException {
  947. // The delta stream starts as two variable length integers. If we
  948. // assume they are 64 bits each, we need 16 bytes to encode them,
  949. // plus 2 extra bytes for the variable length overhead. So 18 is
  950. // the longest delta instruction header.
  951. //
  952. final byte[] hdr = new byte[32];
  953. wc.inflate(this, pos, hdr, true /* header only */);
  954. return hdr;
  955. }
  956. int getObjectType(DfsReader ctx, long pos) throws IOException {
  957. final byte[] ib = ctx.tempId;
  958. for (;;) {
  959. readFully(pos, ib, 0, 20, ctx);
  960. int c = ib[0] & 0xff;
  961. final int type = (c >> 4) & 7;
  962. switch (type) {
  963. case Constants.OBJ_COMMIT:
  964. case Constants.OBJ_TREE:
  965. case Constants.OBJ_BLOB:
  966. case Constants.OBJ_TAG:
  967. return type;
  968. case Constants.OBJ_OFS_DELTA: {
  969. int p = 1;
  970. while ((c & 0x80) != 0)
  971. c = ib[p++] & 0xff;
  972. c = ib[p++] & 0xff;
  973. long ofs = c & 127;
  974. while ((c & 128) != 0) {
  975. ofs += 1;
  976. c = ib[p++] & 0xff;
  977. ofs <<= 7;
  978. ofs += (c & 127);
  979. }
  980. pos = pos - ofs;
  981. continue;
  982. }
  983. case Constants.OBJ_REF_DELTA: {
  984. int p = 1;
  985. while ((c & 0x80) != 0)
  986. c = ib[p++] & 0xff;
  987. readFully(pos + p, ib, 0, 20, ctx);
  988. pos = findDeltaBase(ctx, ObjectId.fromRaw(ib));
  989. continue;
  990. }
  991. default:
  992. throw new IOException(MessageFormat.format(
  993. JGitText.get().unknownObjectType, Integer.valueOf(type)));
  994. }
  995. }
  996. }
  997. long getObjectSize(DfsReader ctx, AnyObjectId id) throws IOException {
  998. final long offset = idx(ctx).findOffset(id);
  999. return 0 < offset ? getObjectSize(ctx, offset) : -1;
  1000. }
  1001. long getObjectSize(DfsReader ctx, long pos)
  1002. throws IOException {
  1003. final byte[] ib = ctx.tempId;
  1004. readFully(pos, ib, 0, 20, ctx);
  1005. int c = ib[0] & 0xff;
  1006. final int type = (c >> 4) & 7;
  1007. long sz = c & 15;
  1008. int shift = 4;
  1009. int p = 1;
  1010. while ((c & 0x80) != 0) {
  1011. c = ib[p++] & 0xff;
  1012. sz += ((long) (c & 0x7f)) << shift;
  1013. shift += 7;
  1014. }
  1015. long deltaAt;
  1016. switch (type) {
  1017. case Constants.OBJ_COMMIT:
  1018. case Constants.OBJ_TREE:
  1019. case Constants.OBJ_BLOB:
  1020. case Constants.OBJ_TAG:
  1021. return sz;
  1022. case Constants.OBJ_OFS_DELTA:
  1023. c = ib[p++] & 0xff;
  1024. while ((c & 128) != 0)
  1025. c = ib[p++] & 0xff;
  1026. deltaAt = pos + p;
  1027. break;
  1028. case Constants.OBJ_REF_DELTA:
  1029. deltaAt = pos + p + 20;
  1030. break;
  1031. default:
  1032. throw new IOException(MessageFormat.format(
  1033. JGitText.get().unknownObjectType, Integer.valueOf(type)));
  1034. }
  1035. try {
  1036. return BinaryDelta.getResultSize(getDeltaHeader(ctx, deltaAt));
  1037. } catch (DataFormatException dfe) {
  1038. CorruptObjectException coe = new CorruptObjectException(
  1039. MessageFormat.format(
  1040. JGitText.get().objectAtHasBadZlibStream, Long.valueOf(pos),
  1041. getPackName()));
  1042. coe.initCause(dfe);
  1043. throw coe;
  1044. }
  1045. }
  1046. void representation(DfsObjectRepresentation r, final long pos,
  1047. DfsReader ctx, PackReverseIndex rev)
  1048. throws IOException {
  1049. r.offset = pos;
  1050. final byte[] ib = ctx.tempId;
  1051. readFully(pos, ib, 0, 20, ctx);
  1052. int c = ib[0] & 0xff;
  1053. int p = 1;
  1054. final int typeCode = (c >> 4) & 7;
  1055. while ((c & 0x80) != 0)
  1056. c = ib[p++] & 0xff;
  1057. long len = rev.findNextOffset(pos, length - 20) - pos;
  1058. switch (typeCode) {
  1059. case Constants.OBJ_COMMIT:
  1060. case Constants.OBJ_TREE:
  1061. case Constants.OBJ_BLOB:
  1062. case Constants.OBJ_TAG:
  1063. r.format = StoredObjectRepresentation.PACK_WHOLE;
  1064. r.baseId = null;
  1065. r.length = len - p;
  1066. return;
  1067. case Constants.OBJ_OFS_DELTA: {
  1068. c = ib[p++] & 0xff;
  1069. long ofs = c & 127;
  1070. while ((c & 128) != 0) {
  1071. ofs += 1;
  1072. c = ib[p++] & 0xff;
  1073. ofs <<= 7;
  1074. ofs += (c & 127);
  1075. }
  1076. r.format = StoredObjectRepresentation.PACK_DELTA;
  1077. r.baseId = rev.findObject(pos - ofs);
  1078. r.length = len - p;
  1079. return;
  1080. }
  1081. case Constants.OBJ_REF_DELTA: {
  1082. readFully(pos + p, ib, 0, 20, ctx);
  1083. r.format = StoredObjectRepresentation.PACK_DELTA;
  1084. r.baseId = ObjectId.fromRaw(ib);
  1085. r.length = len - p - 20;
  1086. return;
  1087. }
  1088. default:
  1089. throw new IOException(MessageFormat.format(
  1090. JGitText.get().unknownObjectType, Integer.valueOf(typeCode)));
  1091. }
  1092. }
  1093. boolean isCorrupt(long offset) {
  1094. LongList list = corruptObjects;
  1095. if (list == null)
  1096. return false;
  1097. synchronized (list) {
  1098. return list.contains(offset);
  1099. }
  1100. }
  1101. private void setCorrupt(long offset) {
  1102. LongList list = corruptObjects;
  1103. if (list == null) {
  1104. synchronized (initLock) {
  1105. list = corruptObjects;
  1106. if (list == null) {
  1107. list = new LongList();
  1108. corruptObjects = list;
  1109. }
  1110. }
  1111. }
  1112. synchronized (list) {
  1113. list.add(offset);
  1114. }
  1115. }
  1116. }