You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

DeltaWindow.java 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. /*
  2. * Copyright (C) 2010, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.storage.pack;
  44. import java.io.EOFException;
  45. import java.io.IOException;
  46. import java.io.OutputStream;
  47. import java.util.zip.Deflater;
  48. import org.eclipse.jgit.errors.IncorrectObjectTypeException;
  49. import org.eclipse.jgit.errors.LargeObjectException;
  50. import org.eclipse.jgit.errors.MissingObjectException;
  51. import org.eclipse.jgit.lib.ObjectReader;
  52. import org.eclipse.jgit.lib.ProgressMonitor;
  53. import org.eclipse.jgit.util.TemporaryBuffer;
  54. class DeltaWindow {
  55. private static final int NEXT_RES = 0;
  56. private static final int NEXT_SRC = 1;
  57. private final PackConfig config;
  58. private final DeltaCache deltaCache;
  59. private final ObjectReader reader;
  60. private final DeltaWindowEntry[] window;
  61. /** Maximum number of bytes to admit to the window at once. */
  62. private final long maxMemory;
  63. /** Maximum depth we should create for any delta chain. */
  64. private final int maxDepth;
  65. /** Amount of memory we have loaded right now. */
  66. private long loaded;
  67. // The object we are currently considering needs a lot of state:
  68. /** Position of {@link #res} within {@link #window} array. */
  69. private int resSlot;
  70. /**
  71. * Maximum delta chain depth the current object can have.
  72. * <p>
  73. * This can be smaller than {@link #maxDepth}.
  74. */
  75. private int resMaxDepth;
  76. /** Window entry of the object we are currently considering. */
  77. private DeltaWindowEntry res;
  78. /** If we have a delta for {@link #res}, this is the shortest found yet. */
  79. private TemporaryBuffer.Heap bestDelta;
  80. /** If we have {@link #bestDelta}, the window position it was created by. */
  81. private int bestSlot;
  82. /** Used to compress cached deltas. */
  83. private Deflater deflater;
  84. DeltaWindow(PackConfig pc, DeltaCache dc, ObjectReader or) {
  85. config = pc;
  86. deltaCache = dc;
  87. reader = or;
  88. // C Git increases the window size supplied by the user by 1.
  89. // We don't know why it does this, but if the user asks for
  90. // window=10, it actually processes with window=11. Because
  91. // the window size has the largest direct impact on the final
  92. // pack file size, we match this odd behavior here to give us
  93. // a better chance of producing a similar sized pack as C Git.
  94. //
  95. // We would prefer to directly honor the user's request since
  96. // PackWriter has a minimum of 2 for the window size, but then
  97. // users might complain that JGit is creating a bigger pack file.
  98. //
  99. window = new DeltaWindowEntry[config.getDeltaSearchWindowSize() + 1];
  100. for (int i = 0; i < window.length; i++)
  101. window[i] = new DeltaWindowEntry();
  102. maxMemory = config.getDeltaSearchMemoryLimit();
  103. maxDepth = config.getMaxDeltaDepth();
  104. }
  105. void search(ProgressMonitor monitor, ObjectToPack[] toSearch, int off,
  106. int cnt) throws IOException {
  107. try {
  108. for (int end = off + cnt; off < end; off++) {
  109. res = window[resSlot];
  110. if (0 < maxMemory) {
  111. clear(res);
  112. int tail = next(resSlot);
  113. final long need = estimateSize(toSearch[off]);
  114. while (maxMemory < loaded + need && tail != resSlot) {
  115. clear(window[tail]);
  116. tail = next(tail);
  117. }
  118. }
  119. res.set(toSearch[off]);
  120. if (res.object.isEdge() || res.object.doNotAttemptDelta()) {
  121. // We don't actually want to make a delta for
  122. // them, just need to push them into the window
  123. // so they can be read by other objects.
  124. //
  125. keepInWindow();
  126. } else {
  127. // Search for a delta for the current window slot.
  128. //
  129. monitor.update(1);
  130. search();
  131. }
  132. }
  133. } finally {
  134. if (deflater != null)
  135. deflater.end();
  136. }
  137. }
  138. private static long estimateSize(ObjectToPack ent) {
  139. return DeltaIndex.estimateIndexSize(ent.getWeight());
  140. }
  141. private void clear(DeltaWindowEntry ent) {
  142. if (ent.index != null)
  143. loaded -= ent.index.getIndexSize();
  144. else if (res.buffer != null)
  145. loaded -= ent.buffer.length;
  146. ent.set(null);
  147. }
  148. private void search() throws IOException {
  149. // TODO(spearce) If the object is used as a base for other
  150. // objects in this pack we should limit the depth we create
  151. // for ourselves to be the remainder of our longest dependent
  152. // chain and the configured maximum depth. This can happen
  153. // when the dependents are being reused out a pack, but we
  154. // cannot be because we are near the edge of a thin pack.
  155. //
  156. resMaxDepth = maxDepth;
  157. // Loop through the window backwards, considering every entry.
  158. // This lets us look at the bigger objects that came before.
  159. //
  160. for (int srcSlot = prior(resSlot); srcSlot != resSlot; srcSlot = prior(srcSlot)) {
  161. DeltaWindowEntry src = window[srcSlot];
  162. if (src.empty())
  163. break;
  164. if (delta(src, srcSlot) == NEXT_RES) {
  165. bestDelta = null;
  166. return;
  167. }
  168. }
  169. // We couldn't find a suitable delta for this object, but it may
  170. // still be able to act as a base for another one.
  171. //
  172. if (bestDelta == null) {
  173. keepInWindow();
  174. return;
  175. }
  176. // Select this best matching delta as the base for the object.
  177. //
  178. ObjectToPack srcObj = window[bestSlot].object;
  179. ObjectToPack resObj = res.object;
  180. if (srcObj.isEdge()) {
  181. // The source (the delta base) is an edge object outside of the
  182. // pack. Its part of the common base set that the peer already
  183. // has on hand, so we don't want to send it. We have to store
  184. // an ObjectId and *NOT* an ObjectToPack for the base to ensure
  185. // the base isn't included in the outgoing pack file.
  186. //
  187. resObj.setDeltaBase(srcObj.copy());
  188. } else {
  189. // The base is part of the pack we are sending, so it should be
  190. // a direct pointer to the base.
  191. //
  192. resObj.setDeltaBase(srcObj);
  193. }
  194. resObj.setDeltaDepth(srcObj.getDeltaDepth() + 1);
  195. resObj.clearReuseAsIs();
  196. cacheDelta(srcObj, resObj);
  197. // Discard the cached best result, otherwise it leaks.
  198. //
  199. bestDelta = null;
  200. // If this should be the end of a chain, don't keep
  201. // it in the window. Just move on to the next object.
  202. //
  203. if (resObj.getDeltaDepth() == maxDepth)
  204. return;
  205. shuffleBaseUpInPriority();
  206. keepInWindow();
  207. }
  208. private int delta(final DeltaWindowEntry src, final int srcSlot)
  209. throws IOException {
  210. // Objects must use only the same type as their delta base.
  211. // If we are looking at something where that isn't true we
  212. // have exhausted everything of the correct type and should
  213. // move on to the next thing to examine.
  214. //
  215. if (src.type() != res.type()) {
  216. keepInWindow();
  217. return NEXT_RES;
  218. }
  219. // Only consider a source with a short enough delta chain.
  220. if (src.depth() > resMaxDepth)
  221. return NEXT_SRC;
  222. // Estimate a reasonable upper limit on delta size.
  223. int msz = deltaSizeLimit(res, resMaxDepth, src);
  224. if (msz <= 8)
  225. return NEXT_SRC;
  226. // If we have to insert a lot to make this work, find another.
  227. if (res.size() - src.size() > msz)
  228. return NEXT_SRC;
  229. // If the sizes are radically different, this is a bad pairing.
  230. if (res.size() < src.size() / 16)
  231. return NEXT_SRC;
  232. DeltaIndex srcIndex;
  233. try {
  234. srcIndex = index(src);
  235. } catch (LargeObjectException tooBig) {
  236. // If the source is too big to work on, skip it.
  237. dropFromWindow(srcSlot);
  238. return NEXT_SRC;
  239. } catch (IOException notAvailable) {
  240. if (src.object.isEdge()) {
  241. // This is an edge that is suddenly not available.
  242. dropFromWindow(srcSlot);
  243. return NEXT_SRC;
  244. } else {
  245. throw notAvailable;
  246. }
  247. }
  248. byte[] resBuf;
  249. try {
  250. resBuf = buffer(res);
  251. } catch (LargeObjectException tooBig) {
  252. // If its too big, move on to another item.
  253. return NEXT_RES;
  254. }
  255. // If we already have a delta for the current object, abort
  256. // encoding early if this new pairing produces a larger delta.
  257. if (bestDelta != null && bestDelta.length() < msz)
  258. msz = (int) bestDelta.length();
  259. TemporaryBuffer.Heap delta = new TemporaryBuffer.Heap(msz);
  260. try {
  261. if (!srcIndex.encode(delta, resBuf, msz))
  262. return NEXT_SRC;
  263. } catch (IOException deltaTooBig) {
  264. // This only happens when the heap overflows our limit.
  265. return NEXT_SRC;
  266. }
  267. if (isBetterDelta(src, delta)) {
  268. bestDelta = delta;
  269. bestSlot = srcSlot;
  270. }
  271. return NEXT_SRC;
  272. }
  273. private void cacheDelta(ObjectToPack srcObj, ObjectToPack resObj) {
  274. if (Integer.MAX_VALUE < bestDelta.length())
  275. return;
  276. int rawsz = (int) bestDelta.length();
  277. if (deltaCache.canCache(rawsz, srcObj, resObj)) {
  278. try {
  279. byte[] zbuf = new byte[deflateBound(rawsz)];
  280. ZipStream zs = new ZipStream(deflater(), zbuf);
  281. bestDelta.writeTo(zs, null);
  282. bestDelta = null;
  283. int len = zs.finish();
  284. resObj.setCachedDelta(deltaCache.cache(zbuf, len, rawsz));
  285. resObj.setCachedSize(rawsz);
  286. } catch (IOException err) {
  287. deltaCache.credit(rawsz);
  288. } catch (OutOfMemoryError err) {
  289. deltaCache.credit(rawsz);
  290. }
  291. }
  292. }
  293. private static int deflateBound(int insz) {
  294. return insz + ((insz + 7) >> 3) + ((insz + 63) >> 6) + 11;
  295. }
  296. private void shuffleBaseUpInPriority() {
  297. // Shuffle the entire window so that the best match we just used
  298. // is at our current index, and our current object is at the index
  299. // before it. Slide any entries in between to make space.
  300. //
  301. window[resSlot] = window[bestSlot];
  302. DeltaWindowEntry next = res;
  303. int slot = prior(resSlot);
  304. for (; slot != bestSlot; slot = prior(slot)) {
  305. DeltaWindowEntry e = window[slot];
  306. window[slot] = next;
  307. next = e;
  308. }
  309. window[slot] = next;
  310. }
  311. private void keepInWindow() {
  312. resSlot = next(resSlot);
  313. }
  314. private int next(int slot) {
  315. if (++slot == window.length)
  316. return 0;
  317. return slot;
  318. }
  319. private int prior(int slot) {
  320. if (slot == 0)
  321. return window.length - 1;
  322. return slot - 1;
  323. }
  324. private void dropFromWindow(@SuppressWarnings("unused") int srcSlot) {
  325. // We should drop the current source entry from the window,
  326. // it is somehow invalid for us to work with.
  327. }
  328. private boolean isBetterDelta(DeltaWindowEntry src,
  329. TemporaryBuffer.Heap resDelta) {
  330. if (bestDelta == null)
  331. return true;
  332. // If both delta sequences are the same length, use the one
  333. // that has a shorter delta chain since it would be faster
  334. // to access during reads.
  335. //
  336. if (resDelta.length() == bestDelta.length())
  337. return src.depth() < window[bestSlot].depth();
  338. return resDelta.length() < bestDelta.length();
  339. }
  340. private static int deltaSizeLimit(DeltaWindowEntry res, int maxDepth,
  341. DeltaWindowEntry src) {
  342. // Ideally the delta is at least 50% of the original size,
  343. // but we also want to account for delta header overhead in
  344. // the pack file (to point to the delta base) so subtract off
  345. // some of those header bytes from the limit.
  346. //
  347. final int limit = res.size() / 2 - 20;
  348. // Distribute the delta limit over the entire chain length.
  349. // This is weighted such that deeper items in the chain must
  350. // be even smaller than if they were earlier in the chain, as
  351. // they cost significantly more to unpack due to the increased
  352. // number of recursive unpack calls.
  353. //
  354. final int remainingDepth = maxDepth - src.depth();
  355. return (limit * remainingDepth) / maxDepth;
  356. }
  357. private DeltaIndex index(DeltaWindowEntry ent)
  358. throws MissingObjectException, IncorrectObjectTypeException,
  359. IOException, LargeObjectException {
  360. DeltaIndex idx = ent.index;
  361. if (idx == null) {
  362. try {
  363. idx = new DeltaIndex(buffer(ent));
  364. } catch (OutOfMemoryError noMemory) {
  365. LargeObjectException.OutOfMemory e;
  366. e = new LargeObjectException.OutOfMemory(noMemory);
  367. e.setObjectId(ent.object);
  368. throw e;
  369. }
  370. if (0 < maxMemory)
  371. loaded += idx.getIndexSize() - idx.getSourceSize();
  372. ent.index = idx;
  373. }
  374. return idx;
  375. }
  376. private byte[] buffer(DeltaWindowEntry ent) throws MissingObjectException,
  377. IncorrectObjectTypeException, IOException, LargeObjectException {
  378. byte[] buf = ent.buffer;
  379. if (buf == null) {
  380. buf = PackWriter.buffer(config, reader, ent.object);
  381. if (0 < maxMemory)
  382. loaded += buf.length;
  383. ent.buffer = buf;
  384. }
  385. return buf;
  386. }
  387. private Deflater deflater() {
  388. if (deflater == null)
  389. deflater = new Deflater(config.getCompressionLevel());
  390. else
  391. deflater.reset();
  392. return deflater;
  393. }
  394. static final class ZipStream extends OutputStream {
  395. private final Deflater deflater;
  396. private final byte[] zbuf;
  397. private int outPtr;
  398. ZipStream(Deflater deflater, byte[] zbuf) {
  399. this.deflater = deflater;
  400. this.zbuf = zbuf;
  401. }
  402. int finish() throws IOException {
  403. deflater.finish();
  404. for (;;) {
  405. if (outPtr == zbuf.length)
  406. throw new EOFException();
  407. int n = deflater.deflate(zbuf, outPtr, zbuf.length - outPtr);
  408. if (n == 0) {
  409. if (deflater.finished())
  410. return outPtr;
  411. throw new IOException();
  412. }
  413. outPtr += n;
  414. }
  415. }
  416. @Override
  417. public void write(byte[] b, int off, int len) throws IOException {
  418. deflater.setInput(b, off, len);
  419. for (;;) {
  420. if (outPtr == zbuf.length)
  421. throw new EOFException();
  422. int n = deflater.deflate(zbuf, outPtr, zbuf.length - outPtr);
  423. if (n == 0) {
  424. if (deflater.needsInput())
  425. break;
  426. throw new IOException();
  427. }
  428. outPtr += n;
  429. }
  430. }
  431. @Override
  432. public void write(int b) throws IOException {
  433. throw new UnsupportedOperationException();
  434. }
  435. }
  436. }