You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PackWriter.java 37KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152
  1. /*
  2. * Copyright (C) 2008-2010, Google Inc.
  3. * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
  4. * and other copyright owners as documented in the project's IP log.
  5. *
  6. * This program and the accompanying materials are made available
  7. * under the terms of the Eclipse Distribution License v1.0 which
  8. * accompanies this distribution, is reproduced below, and is
  9. * available at http://www.eclipse.org/org/documents/edl-v10.php
  10. *
  11. * All rights reserved.
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. *
  20. * - Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials provided
  23. * with the distribution.
  24. *
  25. * - Neither the name of the Eclipse Foundation, Inc. nor the
  26. * names of its contributors may be used to endorse or promote
  27. * products derived from this software without specific prior
  28. * written permission.
  29. *
  30. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  31. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  32. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  36. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  37. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  38. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  39. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  40. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  42. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. */
  44. package org.eclipse.jgit.storage.pack;
  45. import static org.eclipse.jgit.storage.pack.StoredObjectRepresentation.PACK_DELTA;
  46. import static org.eclipse.jgit.storage.pack.StoredObjectRepresentation.PACK_WHOLE;
  47. import java.io.IOException;
  48. import java.io.OutputStream;
  49. import java.security.MessageDigest;
  50. import java.util.ArrayList;
  51. import java.util.Arrays;
  52. import java.util.Collection;
  53. import java.util.Collections;
  54. import java.util.Comparator;
  55. import java.util.HashSet;
  56. import java.util.Iterator;
  57. import java.util.List;
  58. import java.util.Set;
  59. import java.util.concurrent.CountDownLatch;
  60. import java.util.concurrent.ExecutionException;
  61. import java.util.concurrent.Executor;
  62. import java.util.concurrent.ExecutorService;
  63. import java.util.concurrent.Executors;
  64. import java.util.concurrent.Future;
  65. import java.util.concurrent.TimeUnit;
  66. import java.util.zip.Deflater;
  67. import java.util.zip.DeflaterOutputStream;
  68. import org.eclipse.jgit.JGitText;
  69. import org.eclipse.jgit.errors.CorruptObjectException;
  70. import org.eclipse.jgit.errors.IncorrectObjectTypeException;
  71. import org.eclipse.jgit.errors.LargeObjectException;
  72. import org.eclipse.jgit.errors.MissingObjectException;
  73. import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException;
  74. import org.eclipse.jgit.lib.AnyObjectId;
  75. import org.eclipse.jgit.lib.AsyncObjectSizeQueue;
  76. import org.eclipse.jgit.lib.Constants;
  77. import org.eclipse.jgit.lib.NullProgressMonitor;
  78. import org.eclipse.jgit.lib.ObjectId;
  79. import org.eclipse.jgit.lib.ObjectIdSubclassMap;
  80. import org.eclipse.jgit.lib.ObjectLoader;
  81. import org.eclipse.jgit.lib.ObjectReader;
  82. import org.eclipse.jgit.lib.ProgressMonitor;
  83. import org.eclipse.jgit.lib.Repository;
  84. import org.eclipse.jgit.lib.ThreadSafeProgressMonitor;
  85. import org.eclipse.jgit.revwalk.AsyncRevObjectQueue;
  86. import org.eclipse.jgit.revwalk.ObjectWalk;
  87. import org.eclipse.jgit.revwalk.RevFlag;
  88. import org.eclipse.jgit.revwalk.RevObject;
  89. import org.eclipse.jgit.revwalk.RevSort;
  90. import org.eclipse.jgit.storage.file.PackIndexWriter;
  91. import org.eclipse.jgit.util.TemporaryBuffer;
  92. /**
  93. * <p>
  94. * PackWriter class is responsible for generating pack files from specified set
  95. * of objects from repository. This implementation produce pack files in format
  96. * version 2.
  97. * </p>
  98. * <p>
  99. * Source of objects may be specified in two ways:
  100. * <ul>
  101. * <li>(usually) by providing sets of interesting and uninteresting objects in
  102. * repository - all interesting objects and their ancestors except uninteresting
  103. * objects and their ancestors will be included in pack, or</li>
  104. * <li>by providing iterator of {@link RevObject} specifying exact list and
  105. * order of objects in pack</li>
  106. * </ul>
  107. * Typical usage consists of creating instance intended for some pack,
  108. * configuring options, preparing the list of objects by calling
  109. * {@link #preparePack(Iterator)} or
  110. * {@link #preparePack(ProgressMonitor, Collection, Collection)}, and finally
  111. * producing the stream with {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}.
  112. * </p>
  113. * <p>
  114. * Class provide set of configurable options and {@link ProgressMonitor}
  115. * support, as operations may take a long time for big repositories. Deltas
  116. * searching algorithm is <b>NOT IMPLEMENTED</b> yet - this implementation
  117. * relies only on deltas and objects reuse.
  118. * </p>
  119. * <p>
  120. * This class is not thread safe, it is intended to be used in one thread, with
  121. * one instance per created pack. Subsequent calls to writePack result in
  122. * undefined behavior.
  123. * </p>
  124. */
  125. public class PackWriter {
  126. private static final int PACK_VERSION_GENERATED = 2;
  127. @SuppressWarnings("unchecked")
  128. private final List<ObjectToPack> objectsLists[] = new List[Constants.OBJ_TAG + 1];
  129. {
  130. objectsLists[0] = Collections.<ObjectToPack> emptyList();
  131. objectsLists[Constants.OBJ_COMMIT] = new ArrayList<ObjectToPack>();
  132. objectsLists[Constants.OBJ_TREE] = new ArrayList<ObjectToPack>();
  133. objectsLists[Constants.OBJ_BLOB] = new ArrayList<ObjectToPack>();
  134. objectsLists[Constants.OBJ_TAG] = new ArrayList<ObjectToPack>();
  135. }
  136. private final ObjectIdSubclassMap<ObjectToPack> objectsMap = new ObjectIdSubclassMap<ObjectToPack>();
  137. // edge objects for thin packs
  138. private final ObjectIdSubclassMap<ObjectToPack> edgeObjects = new ObjectIdSubclassMap<ObjectToPack>();
  139. private Deflater myDeflater;
  140. private final ObjectReader reader;
  141. /** {@link #reader} recast to the reuse interface, if it supports it. */
  142. private final ObjectReuseAsIs reuseSupport;
  143. private final PackConfig config;
  144. private List<ObjectToPack> sortedByName;
  145. private byte packcsum[];
  146. private boolean deltaBaseAsOffset;
  147. private boolean reuseDeltas;
  148. private boolean thin;
  149. private boolean ignoreMissingUninteresting = true;
  150. /**
  151. * Create writer for specified repository.
  152. * <p>
  153. * Objects for packing are specified in {@link #preparePack(Iterator)} or
  154. * {@link #preparePack(ProgressMonitor, Collection, Collection)}.
  155. *
  156. * @param repo
  157. * repository where objects are stored.
  158. */
  159. public PackWriter(final Repository repo) {
  160. this(repo, repo.newObjectReader());
  161. }
  162. /**
  163. * Create a writer to load objects from the specified reader.
  164. * <p>
  165. * Objects for packing are specified in {@link #preparePack(Iterator)} or
  166. * {@link #preparePack(ProgressMonitor, Collection, Collection)}.
  167. *
  168. * @param reader
  169. * reader to read from the repository with.
  170. */
  171. public PackWriter(final ObjectReader reader) {
  172. this(new PackConfig(), reader);
  173. }
  174. /**
  175. * Create writer for specified repository.
  176. * <p>
  177. * Objects for packing are specified in {@link #preparePack(Iterator)} or
  178. * {@link #preparePack(ProgressMonitor, Collection, Collection)}.
  179. *
  180. * @param repo
  181. * repository where objects are stored.
  182. * @param reader
  183. * reader to read from the repository with.
  184. */
  185. public PackWriter(final Repository repo, final ObjectReader reader) {
  186. this(new PackConfig(repo), reader);
  187. }
  188. /**
  189. * Create writer with a specified configuration.
  190. * <p>
  191. * Objects for packing are specified in {@link #preparePack(Iterator)} or
  192. * {@link #preparePack(ProgressMonitor, Collection, Collection)}.
  193. *
  194. * @param config
  195. * configuration for the pack writer.
  196. * @param reader
  197. * reader to read from the repository with.
  198. */
  199. public PackWriter(final PackConfig config, final ObjectReader reader) {
  200. this.config = config;
  201. this.reader = reader;
  202. if (reader instanceof ObjectReuseAsIs)
  203. reuseSupport = ((ObjectReuseAsIs) reader);
  204. else
  205. reuseSupport = null;
  206. deltaBaseAsOffset = config.isDeltaBaseAsOffset();
  207. reuseDeltas = config.isReuseDeltas();
  208. }
  209. /**
  210. * Check whether writer can store delta base as an offset (new style
  211. * reducing pack size) or should store it as an object id (legacy style,
  212. * compatible with old readers).
  213. *
  214. * Default setting: {@value PackConfig#DEFAULT_DELTA_BASE_AS_OFFSET}
  215. *
  216. * @return true if delta base is stored as an offset; false if it is stored
  217. * as an object id.
  218. */
  219. public boolean isDeltaBaseAsOffset() {
  220. return deltaBaseAsOffset;
  221. }
  222. /**
  223. * Set writer delta base format. Delta base can be written as an offset in a
  224. * pack file (new approach reducing file size) or as an object id (legacy
  225. * approach, compatible with old readers).
  226. *
  227. * Default setting: {@value PackConfig#DEFAULT_DELTA_BASE_AS_OFFSET}
  228. *
  229. * @param deltaBaseAsOffset
  230. * boolean indicating whether delta base can be stored as an
  231. * offset.
  232. */
  233. public void setDeltaBaseAsOffset(boolean deltaBaseAsOffset) {
  234. this.deltaBaseAsOffset = deltaBaseAsOffset;
  235. }
  236. /** @return true if this writer is producing a thin pack. */
  237. public boolean isThin() {
  238. return thin;
  239. }
  240. /**
  241. * @param packthin
  242. * a boolean indicating whether writer may pack objects with
  243. * delta base object not within set of objects to pack, but
  244. * belonging to party repository (uninteresting/boundary) as
  245. * determined by set; this kind of pack is used only for
  246. * transport; true - to produce thin pack, false - otherwise.
  247. */
  248. public void setThin(final boolean packthin) {
  249. thin = packthin;
  250. }
  251. /**
  252. * @return true to ignore objects that are uninteresting and also not found
  253. * on local disk; false to throw a {@link MissingObjectException}
  254. * out of {@link #preparePack(ProgressMonitor, Collection, Collection)} if an
  255. * uninteresting object is not in the source repository. By default,
  256. * true, permitting gracefully ignoring of uninteresting objects.
  257. */
  258. public boolean isIgnoreMissingUninteresting() {
  259. return ignoreMissingUninteresting;
  260. }
  261. /**
  262. * @param ignore
  263. * true if writer should ignore non existing uninteresting
  264. * objects during construction set of objects to pack; false
  265. * otherwise - non existing uninteresting objects may cause
  266. * {@link MissingObjectException}
  267. */
  268. public void setIgnoreMissingUninteresting(final boolean ignore) {
  269. ignoreMissingUninteresting = ignore;
  270. }
  271. /**
  272. * Returns objects number in a pack file that was created by this writer.
  273. *
  274. * @return number of objects in pack.
  275. */
  276. public int getObjectsNumber() {
  277. return objectsMap.size();
  278. }
  279. /**
  280. * Prepare the list of objects to be written to the pack stream.
  281. * <p>
  282. * Iterator <b>exactly</b> determines which objects are included in a pack
  283. * and order they appear in pack (except that objects order by type is not
  284. * needed at input). This order should conform general rules of ordering
  285. * objects in git - by recency and path (type and delta-base first is
  286. * internally secured) and responsibility for guaranteeing this order is on
  287. * a caller side. Iterator must return each id of object to write exactly
  288. * once.
  289. * </p>
  290. * <p>
  291. * When iterator returns object that has {@link RevFlag#UNINTERESTING} flag,
  292. * this object won't be included in an output pack. Instead, it is recorded
  293. * as edge-object (known to remote repository) for thin-pack. In such a case
  294. * writer may pack objects with delta base object not within set of objects
  295. * to pack, but belonging to party repository - those marked with
  296. * {@link RevFlag#UNINTERESTING} flag. This type of pack is used only for
  297. * transport.
  298. * </p>
  299. *
  300. * @param objectsSource
  301. * iterator of object to store in a pack; order of objects within
  302. * each type is important, ordering by type is not needed;
  303. * allowed types for objects are {@link Constants#OBJ_COMMIT},
  304. * {@link Constants#OBJ_TREE}, {@link Constants#OBJ_BLOB} and
  305. * {@link Constants#OBJ_TAG}; objects returned by iterator may
  306. * be later reused by caller as object id and type are internally
  307. * copied in each iteration; if object returned by iterator has
  308. * {@link RevFlag#UNINTERESTING} flag set, it won't be included
  309. * in a pack, but is considered as edge-object for thin-pack.
  310. * @throws IOException
  311. * when some I/O problem occur during reading objects.
  312. */
  313. public void preparePack(final Iterator<RevObject> objectsSource)
  314. throws IOException {
  315. while (objectsSource.hasNext()) {
  316. addObject(objectsSource.next());
  317. }
  318. }
  319. /**
  320. * Prepare the list of objects to be written to the pack stream.
  321. * <p>
  322. * Basing on these 2 sets, another set of objects to put in a pack file is
  323. * created: this set consists of all objects reachable (ancestors) from
  324. * interesting objects, except uninteresting objects and their ancestors.
  325. * This method uses class {@link ObjectWalk} extensively to find out that
  326. * appropriate set of output objects and their optimal order in output pack.
  327. * Order is consistent with general git in-pack rules: sort by object type,
  328. * recency, path and delta-base first.
  329. * </p>
  330. *
  331. * @param countingMonitor
  332. * progress during object enumeration.
  333. * @param interestingObjects
  334. * collection of objects to be marked as interesting (start
  335. * points of graph traversal).
  336. * @param uninterestingObjects
  337. * collection of objects to be marked as uninteresting (end
  338. * points of graph traversal).
  339. * @throws IOException
  340. * when some I/O problem occur during reading objects.
  341. */
  342. public void preparePack(ProgressMonitor countingMonitor,
  343. final Collection<? extends ObjectId> interestingObjects,
  344. final Collection<? extends ObjectId> uninterestingObjects)
  345. throws IOException {
  346. if (countingMonitor == null)
  347. countingMonitor = NullProgressMonitor.INSTANCE;
  348. ObjectWalk walker = setUpWalker(interestingObjects,
  349. uninterestingObjects);
  350. findObjectsToPack(countingMonitor, walker);
  351. }
  352. /**
  353. * Determine if the pack file will contain the requested object.
  354. *
  355. * @param id
  356. * the object to test the existence of.
  357. * @return true if the object will appear in the output pack file.
  358. */
  359. public boolean willInclude(final AnyObjectId id) {
  360. return get(id) != null;
  361. }
  362. /**
  363. * Lookup the ObjectToPack object for a given ObjectId.
  364. *
  365. * @param id
  366. * the object to find in the pack.
  367. * @return the object we are packing, or null.
  368. */
  369. public ObjectToPack get(AnyObjectId id) {
  370. return objectsMap.get(id);
  371. }
  372. /**
  373. * Computes SHA-1 of lexicographically sorted objects ids written in this
  374. * pack, as used to name a pack file in repository.
  375. *
  376. * @return ObjectId representing SHA-1 name of a pack that was created.
  377. */
  378. public ObjectId computeName() {
  379. final byte[] buf = new byte[Constants.OBJECT_ID_LENGTH];
  380. final MessageDigest md = Constants.newMessageDigest();
  381. for (ObjectToPack otp : sortByName()) {
  382. otp.copyRawTo(buf, 0);
  383. md.update(buf, 0, Constants.OBJECT_ID_LENGTH);
  384. }
  385. return ObjectId.fromRaw(md.digest());
  386. }
  387. /**
  388. * Create an index file to match the pack file just written.
  389. * <p>
  390. * This method can only be invoked after {@link #preparePack(Iterator)} or
  391. * {@link #preparePack(ProgressMonitor, Collection, Collection)} has been
  392. * invoked and completed successfully. Writing a corresponding index is an
  393. * optional feature that not all pack users may require.
  394. *
  395. * @param indexStream
  396. * output for the index data. Caller is responsible for closing
  397. * this stream.
  398. * @throws IOException
  399. * the index data could not be written to the supplied stream.
  400. */
  401. public void writeIndex(final OutputStream indexStream) throws IOException {
  402. final List<ObjectToPack> list = sortByName();
  403. final PackIndexWriter iw;
  404. int indexVersion = config.getIndexVersion();
  405. if (indexVersion <= 0)
  406. iw = PackIndexWriter.createOldestPossible(indexStream, list);
  407. else
  408. iw = PackIndexWriter.createVersion(indexStream, indexVersion);
  409. iw.write(list, packcsum);
  410. }
  411. private List<ObjectToPack> sortByName() {
  412. if (sortedByName == null) {
  413. sortedByName = new ArrayList<ObjectToPack>(objectsMap.size());
  414. for (List<ObjectToPack> list : objectsLists) {
  415. for (ObjectToPack otp : list)
  416. sortedByName.add(otp);
  417. }
  418. Collections.sort(sortedByName);
  419. }
  420. return sortedByName;
  421. }
  422. /**
  423. * Write the prepared pack to the supplied stream.
  424. * <p>
  425. * At first, this method collects and sorts objects to pack, then deltas
  426. * search is performed if set up accordingly, finally pack stream is
  427. * written.
  428. * </p>
  429. * <p>
  430. * All reused objects data checksum (Adler32/CRC32) is computed and
  431. * validated against existing checksum.
  432. * </p>
  433. *
  434. * @param compressMonitor
  435. * progress monitor to report object compression work.
  436. * @param writeMonitor
  437. * progress monitor to report the number of objects written.
  438. * @param packStream
  439. * output stream of pack data. The stream should be buffered by
  440. * the caller. The caller is responsible for closing the stream.
  441. * @throws IOException
  442. * an error occurred reading a local object's data to include in
  443. * the pack, or writing compressed object data to the output
  444. * stream.
  445. */
  446. public void writePack(ProgressMonitor compressMonitor,
  447. ProgressMonitor writeMonitor, OutputStream packStream)
  448. throws IOException {
  449. if (compressMonitor == null)
  450. compressMonitor = NullProgressMonitor.INSTANCE;
  451. if (writeMonitor == null)
  452. writeMonitor = NullProgressMonitor.INSTANCE;
  453. if ((reuseDeltas || config.isReuseObjects()) && reuseSupport != null)
  454. searchForReuse(compressMonitor);
  455. if (config.isDeltaCompress())
  456. searchForDeltas(compressMonitor);
  457. final PackOutputStream out = new PackOutputStream(writeMonitor,
  458. packStream, this);
  459. int objCnt = getObjectsNumber();
  460. writeMonitor.beginTask(JGitText.get().writingObjects, objCnt);
  461. out.writeFileHeader(PACK_VERSION_GENERATED, objCnt);
  462. out.flush();
  463. writeObjects(out);
  464. writeChecksum(out);
  465. reader.release();
  466. writeMonitor.endTask();
  467. }
  468. /** Release all resources used by this writer. */
  469. public void release() {
  470. reader.release();
  471. if (myDeflater != null) {
  472. myDeflater.end();
  473. myDeflater = null;
  474. }
  475. }
  476. private void searchForReuse(ProgressMonitor monitor) throws IOException {
  477. monitor.beginTask(JGitText.get().searchForReuse, getObjectsNumber());
  478. for (List<ObjectToPack> list : objectsLists)
  479. reuseSupport.selectObjectRepresentation(this, monitor, list);
  480. monitor.endTask();
  481. }
  482. private void searchForDeltas(ProgressMonitor monitor)
  483. throws MissingObjectException, IncorrectObjectTypeException,
  484. IOException {
  485. // Commits and annotated tags tend to have too many differences to
  486. // really benefit from delta compression. Consequently just don't
  487. // bother examining those types here.
  488. //
  489. ObjectToPack[] list = new ObjectToPack[
  490. objectsLists[Constants.OBJ_TREE].size()
  491. + objectsLists[Constants.OBJ_BLOB].size()
  492. + edgeObjects.size()];
  493. int cnt = 0;
  494. cnt = findObjectsNeedingDelta(list, cnt, Constants.OBJ_TREE);
  495. cnt = findObjectsNeedingDelta(list, cnt, Constants.OBJ_BLOB);
  496. if (cnt == 0)
  497. return;
  498. // Queue up any edge objects that we might delta against. We won't
  499. // be sending these as we assume the other side has them, but we need
  500. // them in the search phase below.
  501. //
  502. for (ObjectToPack eo : edgeObjects) {
  503. eo.setWeight(0);
  504. list[cnt++] = eo;
  505. }
  506. // Compute the sizes of the objects so we can do a proper sort.
  507. // We let the reader skip missing objects if it chooses. For
  508. // some readers this can be a huge win. We detect missing objects
  509. // by having set the weights above to 0 and allowing the delta
  510. // search code to discover the missing object and skip over it, or
  511. // abort with an exception if we actually had to have it.
  512. //
  513. monitor.beginTask(JGitText.get().compressingObjects, cnt);
  514. AsyncObjectSizeQueue<ObjectToPack> sizeQueue = reader.getObjectSize(
  515. Arrays.<ObjectToPack> asList(list).subList(0, cnt), false);
  516. try {
  517. final long limit = config.getBigFileThreshold();
  518. for (;;) {
  519. monitor.update(1);
  520. try {
  521. if (!sizeQueue.next())
  522. break;
  523. } catch (MissingObjectException notFound) {
  524. if (ignoreMissingUninteresting) {
  525. ObjectToPack otp = sizeQueue.getCurrent();
  526. if (otp != null && otp.isEdge()) {
  527. otp.setDoNotDelta(true);
  528. continue;
  529. }
  530. otp = edgeObjects.get(notFound.getObjectId());
  531. if (otp != null) {
  532. otp.setDoNotDelta(true);
  533. continue;
  534. }
  535. }
  536. throw notFound;
  537. }
  538. ObjectToPack otp = sizeQueue.getCurrent();
  539. if (otp == null) {
  540. otp = objectsMap.get(sizeQueue.getObjectId());
  541. if (otp == null)
  542. otp = edgeObjects.get(sizeQueue.getObjectId());
  543. }
  544. long sz = sizeQueue.getSize();
  545. if (limit <= sz || Integer.MAX_VALUE <= sz)
  546. otp.setDoNotDelta(true); // too big, avoid costly files
  547. else if (sz <= DeltaIndex.BLKSZ)
  548. otp.setDoNotDelta(true); // too small, won't work
  549. else
  550. otp.setWeight((int) sz);
  551. }
  552. } finally {
  553. sizeQueue.release();
  554. }
  555. monitor.endTask();
  556. // Sort the objects by path hash so like files are near each other,
  557. // and then by size descending so that bigger files are first. This
  558. // applies "Linus' Law" which states that newer files tend to be the
  559. // bigger ones, because source files grow and hardly ever shrink.
  560. //
  561. Arrays.sort(list, 0, cnt, new Comparator<ObjectToPack>() {
  562. public int compare(ObjectToPack a, ObjectToPack b) {
  563. int cmp = (a.isDoNotDelta() ? 1 : 0)
  564. - (b.isDoNotDelta() ? 1 : 0);
  565. if (cmp != 0)
  566. return cmp;
  567. cmp = a.getType() - b.getType();
  568. if (cmp != 0)
  569. return cmp;
  570. cmp = (a.getPathHash() >>> 1) - (b.getPathHash() >>> 1);
  571. if (cmp != 0)
  572. return cmp;
  573. cmp = (a.getPathHash() & 1) - (b.getPathHash() & 1);
  574. if (cmp != 0)
  575. return cmp;
  576. return b.getWeight() - a.getWeight();
  577. }
  578. });
  579. // Above we stored the objects we cannot delta onto the end.
  580. // Remove them from the list so we don't waste time on them.
  581. while (0 < cnt && list[cnt - 1].isDoNotDelta())
  582. cnt--;
  583. if (cnt == 0)
  584. return;
  585. monitor.beginTask(JGitText.get().compressingObjects, cnt);
  586. searchForDeltas(monitor, list, cnt);
  587. monitor.endTask();
  588. }
  589. private int findObjectsNeedingDelta(ObjectToPack[] list, int cnt, int type) {
  590. for (ObjectToPack otp : objectsLists[type]) {
  591. if (otp.isDoNotDelta()) // delta is disabled for this path
  592. continue;
  593. if (otp.isDeltaRepresentation()) // already reusing a delta
  594. continue;
  595. otp.setWeight(0);
  596. list[cnt++] = otp;
  597. }
  598. return cnt;
  599. }
  600. private void searchForDeltas(final ProgressMonitor monitor,
  601. final ObjectToPack[] list, final int cnt)
  602. throws MissingObjectException, IncorrectObjectTypeException,
  603. LargeObjectException, IOException {
  604. int threads = config.getThreads();
  605. if (threads == 0)
  606. threads = Runtime.getRuntime().availableProcessors();
  607. if (threads <= 1 || cnt <= 2 * config.getDeltaSearchWindowSize()) {
  608. DeltaCache dc = new DeltaCache(config);
  609. DeltaWindow dw = new DeltaWindow(config, dc, reader);
  610. dw.search(monitor, list, 0, cnt);
  611. return;
  612. }
  613. final DeltaCache dc = new ThreadSafeDeltaCache(config);
  614. final ProgressMonitor pm = new ThreadSafeProgressMonitor(monitor);
  615. // Guess at the size of batch we want. Because we don't really
  616. // have a way for a thread to steal work from another thread if
  617. // it ends early, we over partition slightly so the work units
  618. // are a bit smaller.
  619. //
  620. int estSize = cnt / (threads * 2);
  621. if (estSize < 2 * config.getDeltaSearchWindowSize())
  622. estSize = 2 * config.getDeltaSearchWindowSize();
  623. final List<DeltaTask> myTasks = new ArrayList<DeltaTask>(threads * 2);
  624. for (int i = 0; i < cnt;) {
  625. final int start = i;
  626. final int batchSize;
  627. if (cnt - i < estSize) {
  628. // If we don't have enough to fill the remaining block,
  629. // schedule what is left over as a single block.
  630. //
  631. batchSize = cnt - i;
  632. } else {
  633. // Try to split the block at the end of a path.
  634. //
  635. int end = start + estSize;
  636. while (end < cnt) {
  637. ObjectToPack a = list[end - 1];
  638. ObjectToPack b = list[end];
  639. if (a.getPathHash() == b.getPathHash())
  640. end++;
  641. else
  642. break;
  643. }
  644. batchSize = end - start;
  645. }
  646. i += batchSize;
  647. myTasks.add(new DeltaTask(config, reader, dc, pm, batchSize, start, list));
  648. }
  649. final Executor executor = config.getExecutor();
  650. final List<Throwable> errors = Collections
  651. .synchronizedList(new ArrayList<Throwable>());
  652. if (executor instanceof ExecutorService) {
  653. // Caller supplied us a service, use it directly.
  654. //
  655. runTasks((ExecutorService) executor, myTasks, errors);
  656. } else if (executor == null) {
  657. // Caller didn't give us a way to run the tasks, spawn up a
  658. // temporary thread pool and make sure it tears down cleanly.
  659. //
  660. ExecutorService pool = Executors.newFixedThreadPool(threads);
  661. try {
  662. runTasks(pool, myTasks, errors);
  663. } finally {
  664. pool.shutdown();
  665. for (;;) {
  666. try {
  667. if (pool.awaitTermination(60, TimeUnit.SECONDS))
  668. break;
  669. } catch (InterruptedException e) {
  670. throw new IOException(
  671. JGitText.get().packingCancelledDuringObjectsWriting);
  672. }
  673. }
  674. }
  675. } else {
  676. // The caller gave us an executor, but it might not do
  677. // asynchronous execution. Wrap everything and hope it
  678. // can schedule these for us.
  679. //
  680. final CountDownLatch done = new CountDownLatch(myTasks.size());
  681. for (final DeltaTask task : myTasks) {
  682. executor.execute(new Runnable() {
  683. public void run() {
  684. try {
  685. task.call();
  686. } catch (Throwable failure) {
  687. errors.add(failure);
  688. } finally {
  689. done.countDown();
  690. }
  691. }
  692. });
  693. }
  694. try {
  695. done.await();
  696. } catch (InterruptedException ie) {
  697. // We can't abort the other tasks as we have no handle.
  698. // Cross our fingers and just break out anyway.
  699. //
  700. throw new IOException(
  701. JGitText.get().packingCancelledDuringObjectsWriting);
  702. }
  703. }
  704. // If any task threw an error, try to report it back as
  705. // though we weren't using a threaded search algorithm.
  706. //
  707. if (!errors.isEmpty()) {
  708. Throwable err = errors.get(0);
  709. if (err instanceof Error)
  710. throw (Error) err;
  711. if (err instanceof RuntimeException)
  712. throw (RuntimeException) err;
  713. if (err instanceof IOException)
  714. throw (IOException) err;
  715. IOException fail = new IOException(err.getMessage());
  716. fail.initCause(err);
  717. throw fail;
  718. }
  719. }
  720. private void runTasks(ExecutorService pool, List<DeltaTask> tasks,
  721. List<Throwable> errors) throws IOException {
  722. List<Future<?>> futures = new ArrayList<Future<?>>(tasks.size());
  723. for (DeltaTask task : tasks)
  724. futures.add(pool.submit(task));
  725. try {
  726. for (Future<?> f : futures) {
  727. try {
  728. f.get();
  729. } catch (ExecutionException failed) {
  730. errors.add(failed.getCause());
  731. }
  732. }
  733. } catch (InterruptedException ie) {
  734. for (Future<?> f : futures)
  735. f.cancel(true);
  736. throw new IOException(
  737. JGitText.get().packingCancelledDuringObjectsWriting);
  738. }
  739. }
  740. private void writeObjects(PackOutputStream out) throws IOException {
  741. if (reuseSupport != null) {
  742. for (List<ObjectToPack> list : objectsLists)
  743. reuseSupport.writeObjects(out, list);
  744. } else {
  745. for (List<ObjectToPack> list : objectsLists) {
  746. for (ObjectToPack otp : list)
  747. out.writeObject(otp);
  748. }
  749. }
  750. }
  751. void writeObject(PackOutputStream out, ObjectToPack otp) throws IOException {
  752. if (otp.isWritten())
  753. return; // We shouldn't be here.
  754. otp.markWantWrite();
  755. if (otp.isDeltaRepresentation())
  756. writeBaseFirst(out, otp);
  757. out.resetCRC32();
  758. otp.setOffset(out.length());
  759. while (otp.isReuseAsIs()) {
  760. try {
  761. reuseSupport.copyObjectAsIs(out, otp);
  762. out.endObject();
  763. otp.setCRC(out.getCRC32());
  764. return;
  765. } catch (StoredObjectRepresentationNotAvailableException gone) {
  766. if (otp.getOffset() == out.length()) {
  767. redoSearchForReuse(otp);
  768. continue;
  769. } else {
  770. // Object writing already started, we cannot recover.
  771. //
  772. CorruptObjectException coe;
  773. coe = new CorruptObjectException(otp, "");
  774. coe.initCause(gone);
  775. throw coe;
  776. }
  777. }
  778. }
  779. // If we reached here, reuse wasn't possible.
  780. //
  781. if (otp.isDeltaRepresentation())
  782. writeDeltaObjectDeflate(out, otp);
  783. else
  784. writeWholeObjectDeflate(out, otp);
  785. out.endObject();
  786. otp.setCRC(out.getCRC32());
  787. }
  788. private void writeBaseFirst(PackOutputStream out, final ObjectToPack otp)
  789. throws IOException {
  790. ObjectToPack baseInPack = otp.getDeltaBase();
  791. if (baseInPack != null) {
  792. if (!baseInPack.isWritten()) {
  793. if (baseInPack.wantWrite()) {
  794. // There is a cycle. Our caller is trying to write the
  795. // object we want as a base, and called us. Turn off
  796. // delta reuse so we can find another form.
  797. //
  798. reuseDeltas = false;
  799. redoSearchForReuse(otp);
  800. reuseDeltas = true;
  801. } else {
  802. writeObject(out, baseInPack);
  803. }
  804. }
  805. } else if (!thin) {
  806. // This should never occur, the base isn't in the pack and
  807. // the pack isn't allowed to reference base outside objects.
  808. // Write the object as a whole form, even if that is slow.
  809. //
  810. otp.clearDeltaBase();
  811. otp.clearReuseAsIs();
  812. }
  813. }
  814. private void redoSearchForReuse(final ObjectToPack otp) throws IOException,
  815. MissingObjectException {
  816. otp.clearDeltaBase();
  817. otp.clearReuseAsIs();
  818. reuseSupport.selectObjectRepresentation(this,
  819. NullProgressMonitor.INSTANCE, Collections.singleton(otp));
  820. }
  821. private void writeWholeObjectDeflate(PackOutputStream out,
  822. final ObjectToPack otp) throws IOException {
  823. final Deflater deflater = deflater();
  824. final ObjectLoader ldr = reader.open(otp, otp.getType());
  825. out.writeHeader(otp, ldr.getSize());
  826. deflater.reset();
  827. DeflaterOutputStream dst = new DeflaterOutputStream(out, deflater);
  828. ldr.copyTo(dst);
  829. dst.finish();
  830. }
  831. private void writeDeltaObjectDeflate(PackOutputStream out,
  832. final ObjectToPack otp) throws IOException {
  833. DeltaCache.Ref ref = otp.popCachedDelta();
  834. if (ref != null) {
  835. byte[] zbuf = ref.get();
  836. if (zbuf != null) {
  837. out.writeHeader(otp, otp.getCachedSize());
  838. out.write(zbuf);
  839. return;
  840. }
  841. }
  842. TemporaryBuffer.Heap delta = delta(otp);
  843. out.writeHeader(otp, delta.length());
  844. Deflater deflater = deflater();
  845. deflater.reset();
  846. DeflaterOutputStream dst = new DeflaterOutputStream(out, deflater);
  847. delta.writeTo(dst, null);
  848. dst.finish();
  849. }
  850. private TemporaryBuffer.Heap delta(final ObjectToPack otp)
  851. throws IOException {
  852. DeltaIndex index = new DeltaIndex(buffer(otp.getDeltaBaseId()));
  853. byte[] res = buffer(otp);
  854. // We never would have proposed this pair if the delta would be
  855. // larger than the unpacked version of the object. So using it
  856. // as our buffer limit is valid: we will never reach it.
  857. //
  858. TemporaryBuffer.Heap delta = new TemporaryBuffer.Heap(res.length);
  859. index.encode(delta, res);
  860. return delta;
  861. }
  862. private byte[] buffer(AnyObjectId objId) throws IOException {
  863. return buffer(config, reader, objId);
  864. }
  865. static byte[] buffer(PackConfig config, ObjectReader or, AnyObjectId objId)
  866. throws IOException {
  867. // PackWriter should have already pruned objects that
  868. // are above the big file threshold, so our chances of
  869. // the object being below it are very good. We really
  870. // shouldn't be here, unless the implementation is odd.
  871. return or.open(objId).getCachedBytes(config.getBigFileThreshold());
  872. }
  873. private Deflater deflater() {
  874. if (myDeflater == null)
  875. myDeflater = new Deflater(config.getCompressionLevel());
  876. return myDeflater;
  877. }
  878. private void writeChecksum(PackOutputStream out) throws IOException {
  879. packcsum = out.getDigest();
  880. out.write(packcsum);
  881. }
  882. private ObjectWalk setUpWalker(
  883. final Collection<? extends ObjectId> interestingObjects,
  884. final Collection<? extends ObjectId> uninterestingObjects)
  885. throws MissingObjectException, IOException,
  886. IncorrectObjectTypeException {
  887. List<ObjectId> all = new ArrayList<ObjectId>(interestingObjects.size());
  888. for (ObjectId id : interestingObjects)
  889. all.add(id.copy());
  890. final Set<ObjectId> not;
  891. if (uninterestingObjects != null && !uninterestingObjects.isEmpty()) {
  892. not = new HashSet<ObjectId>();
  893. for (ObjectId id : uninterestingObjects)
  894. not.add(id.copy());
  895. all.addAll(not);
  896. } else
  897. not = Collections.emptySet();
  898. final ObjectWalk walker = new ObjectWalk(reader);
  899. walker.setRetainBody(false);
  900. walker.sort(RevSort.TOPO);
  901. if (thin && !not.isEmpty())
  902. walker.sort(RevSort.BOUNDARY, true);
  903. AsyncRevObjectQueue q = walker.parseAny(all, true);
  904. try {
  905. for (;;) {
  906. try {
  907. RevObject o = q.next();
  908. if (o == null)
  909. break;
  910. if (not.contains(o.copy()))
  911. walker.markUninteresting(o);
  912. else
  913. walker.markStart(o);
  914. } catch (MissingObjectException e) {
  915. if (ignoreMissingUninteresting
  916. && not.contains(e.getObjectId()))
  917. continue;
  918. throw e;
  919. }
  920. }
  921. } finally {
  922. q.release();
  923. }
  924. return walker;
  925. }
  926. private void findObjectsToPack(final ProgressMonitor countingMonitor,
  927. final ObjectWalk walker) throws MissingObjectException,
  928. IncorrectObjectTypeException, IOException {
  929. countingMonitor.beginTask(JGitText.get().countingObjects,
  930. ProgressMonitor.UNKNOWN);
  931. RevObject o;
  932. while ((o = walker.next()) != null) {
  933. addObject(o, 0);
  934. countingMonitor.update(1);
  935. }
  936. while ((o = walker.nextObject()) != null) {
  937. addObject(o, walker.getPathHashCode());
  938. countingMonitor.update(1);
  939. }
  940. countingMonitor.endTask();
  941. }
  942. /**
  943. * Include one object to the output file.
  944. * <p>
  945. * Objects are written in the order they are added. If the same object is
  946. * added twice, it may be written twice, creating a larger than necessary
  947. * file.
  948. *
  949. * @param object
  950. * the object to add.
  951. * @throws IncorrectObjectTypeException
  952. * the object is an unsupported type.
  953. */
  954. public void addObject(final RevObject object)
  955. throws IncorrectObjectTypeException {
  956. addObject(object, 0);
  957. }
  958. private void addObject(final RevObject object, final int pathHashCode)
  959. throws IncorrectObjectTypeException {
  960. if (object.has(RevFlag.UNINTERESTING)) {
  961. switch (object.getType()) {
  962. case Constants.OBJ_TREE:
  963. case Constants.OBJ_BLOB:
  964. ObjectToPack otp = new ObjectToPack(object);
  965. otp.setPathHash(pathHashCode);
  966. otp.setEdge();
  967. edgeObjects.add(otp);
  968. thin = true;
  969. break;
  970. }
  971. return;
  972. }
  973. final ObjectToPack otp;
  974. if (reuseSupport != null)
  975. otp = reuseSupport.newObjectToPack(object);
  976. else
  977. otp = new ObjectToPack(object);
  978. otp.setPathHash(pathHashCode);
  979. try {
  980. objectsLists[object.getType()].add(otp);
  981. } catch (ArrayIndexOutOfBoundsException x) {
  982. throw new IncorrectObjectTypeException(object,
  983. JGitText.get().incorrectObjectType_COMMITnorTREEnorBLOBnorTAG);
  984. } catch (UnsupportedOperationException x) {
  985. // index pointing to "dummy" empty list
  986. throw new IncorrectObjectTypeException(object,
  987. JGitText.get().incorrectObjectType_COMMITnorTREEnorBLOBnorTAG);
  988. }
  989. objectsMap.add(otp);
  990. }
  991. /**
  992. * Select an object representation for this writer.
  993. * <p>
  994. * An {@link ObjectReader} implementation should invoke this method once for
  995. * each representation available for an object, to allow the writer to find
  996. * the most suitable one for the output.
  997. *
  998. * @param otp
  999. * the object being packed.
  1000. * @param next
  1001. * the next available representation from the repository.
  1002. */
  1003. public void select(ObjectToPack otp, StoredObjectRepresentation next) {
  1004. int nFmt = next.getFormat();
  1005. int nWeight;
  1006. if (otp.isReuseAsIs()) {
  1007. // We've already chosen to reuse a packed form, if next
  1008. // cannot beat that break out early.
  1009. //
  1010. if (PACK_WHOLE < nFmt)
  1011. return; // next isn't packed
  1012. else if (PACK_DELTA < nFmt && otp.isDeltaRepresentation())
  1013. return; // next isn't a delta, but we are
  1014. nWeight = next.getWeight();
  1015. if (otp.getWeight() <= nWeight)
  1016. return; // next would be bigger
  1017. } else
  1018. nWeight = next.getWeight();
  1019. if (nFmt == PACK_DELTA && reuseDeltas) {
  1020. ObjectId baseId = next.getDeltaBase();
  1021. ObjectToPack ptr = objectsMap.get(baseId);
  1022. if (ptr != null) {
  1023. otp.setDeltaBase(ptr);
  1024. otp.setReuseAsIs();
  1025. otp.setWeight(nWeight);
  1026. } else if (thin && edgeObjects.contains(baseId)) {
  1027. otp.setDeltaBase(baseId);
  1028. otp.setReuseAsIs();
  1029. otp.setWeight(nWeight);
  1030. } else {
  1031. otp.clearDeltaBase();
  1032. otp.clearReuseAsIs();
  1033. }
  1034. } else if (nFmt == PACK_WHOLE && config.isReuseObjects()) {
  1035. otp.clearDeltaBase();
  1036. otp.setReuseAsIs();
  1037. otp.setWeight(nWeight);
  1038. } else {
  1039. otp.clearDeltaBase();
  1040. otp.clearReuseAsIs();
  1041. }
  1042. otp.select(next);
  1043. }
  1044. }