You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PackWriter.java 32KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017
  1. /*
  2. * Copyright (C) 2008-2010, Google Inc.
  3. * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
  4. * and other copyright owners as documented in the project's IP log.
  5. *
  6. * This program and the accompanying materials are made available
  7. * under the terms of the Eclipse Distribution License v1.0 which
  8. * accompanies this distribution, is reproduced below, and is
  9. * available at http://www.eclipse.org/org/documents/edl-v10.php
  10. *
  11. * All rights reserved.
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. *
  20. * - Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials provided
  23. * with the distribution.
  24. *
  25. * - Neither the name of the Eclipse Foundation, Inc. nor the
  26. * names of its contributors may be used to endorse or promote
  27. * products derived from this software without specific prior
  28. * written permission.
  29. *
  30. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  31. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  32. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  36. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  37. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  38. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  39. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  40. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  42. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. */
  44. package org.eclipse.jgit.storage.pack;
  45. import static org.eclipse.jgit.storage.pack.StoredObjectRepresentation.PACK_DELTA;
  46. import static org.eclipse.jgit.storage.pack.StoredObjectRepresentation.PACK_WHOLE;
  47. import java.io.IOException;
  48. import java.io.OutputStream;
  49. import java.security.MessageDigest;
  50. import java.util.ArrayList;
  51. import java.util.Collection;
  52. import java.util.Collections;
  53. import java.util.Iterator;
  54. import java.util.List;
  55. import java.util.zip.Deflater;
  56. import java.util.zip.DeflaterOutputStream;
  57. import org.eclipse.jgit.JGitText;
  58. import org.eclipse.jgit.errors.CorruptObjectException;
  59. import org.eclipse.jgit.errors.IncorrectObjectTypeException;
  60. import org.eclipse.jgit.errors.MissingObjectException;
  61. import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException;
  62. import org.eclipse.jgit.lib.AnyObjectId;
  63. import org.eclipse.jgit.lib.Config;
  64. import org.eclipse.jgit.lib.Constants;
  65. import org.eclipse.jgit.lib.NullProgressMonitor;
  66. import org.eclipse.jgit.lib.ObjectId;
  67. import org.eclipse.jgit.lib.ObjectIdSubclassMap;
  68. import org.eclipse.jgit.lib.ObjectLoader;
  69. import org.eclipse.jgit.lib.ObjectReader;
  70. import org.eclipse.jgit.lib.ProgressMonitor;
  71. import org.eclipse.jgit.lib.Repository;
  72. import org.eclipse.jgit.revwalk.ObjectWalk;
  73. import org.eclipse.jgit.revwalk.RevFlag;
  74. import org.eclipse.jgit.revwalk.RevObject;
  75. import org.eclipse.jgit.revwalk.RevSort;
  76. import org.eclipse.jgit.storage.file.PackIndexWriter;
  77. /**
  78. * <p>
  79. * PackWriter class is responsible for generating pack files from specified set
  80. * of objects from repository. This implementation produce pack files in format
  81. * version 2.
  82. * </p>
  83. * <p>
  84. * Source of objects may be specified in two ways:
  85. * <ul>
  86. * <li>(usually) by providing sets of interesting and uninteresting objects in
  87. * repository - all interesting objects and their ancestors except uninteresting
  88. * objects and their ancestors will be included in pack, or</li>
  89. * <li>by providing iterator of {@link RevObject} specifying exact list and
  90. * order of objects in pack</li>
  91. * </ul>
  92. * Typical usage consists of creating instance intended for some pack,
  93. * configuring options, preparing the list of objects by calling
  94. * {@link #preparePack(Iterator)} or
  95. * {@link #preparePack(ProgressMonitor, Collection, Collection)}, and finally
  96. * producing the stream with {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}.
  97. * </p>
  98. * <p>
  99. * Class provide set of configurable options and {@link ProgressMonitor}
  100. * support, as operations may take a long time for big repositories. Deltas
  101. * searching algorithm is <b>NOT IMPLEMENTED</b> yet - this implementation
  102. * relies only on deltas and objects reuse.
  103. * </p>
  104. * <p>
  105. * This class is not thread safe, it is intended to be used in one thread, with
  106. * one instance per created pack. Subsequent calls to writePack result in
  107. * undefined behavior.
  108. * </p>
  109. */
  110. public class PackWriter {
  111. /**
  112. * Title of {@link ProgressMonitor} task used during counting objects to
  113. * pack.
  114. *
  115. * @see #preparePack(ProgressMonitor, Collection, Collection)
  116. */
  117. public static final String COUNTING_OBJECTS_PROGRESS = JGitText.get().countingObjects;
  118. /**
  119. * Title of {@link ProgressMonitor} task used during compression.
  120. *
  121. * @see #writePack(ProgressMonitor, ProgressMonitor, OutputStream)
  122. */
  123. public static final String COMPRESSING_OBJECTS_PROGRESS = JGitText.get().compressingObjects;
  124. /**
  125. * Title of {@link ProgressMonitor} task used during writing out pack
  126. * (objects)
  127. *
  128. * @see #writePack(ProgressMonitor, ProgressMonitor, OutputStream)
  129. */
  130. public static final String WRITING_OBJECTS_PROGRESS = JGitText.get().writingObjects;
  131. /**
  132. * Default value of deltas reuse option.
  133. *
  134. * @see #setReuseDeltas(boolean)
  135. */
  136. public static final boolean DEFAULT_REUSE_DELTAS = true;
  137. /**
  138. * Default value of objects reuse option.
  139. *
  140. * @see #setReuseObjects(boolean)
  141. */
  142. public static final boolean DEFAULT_REUSE_OBJECTS = true;
  143. /**
  144. * Default value of delta base as offset option.
  145. *
  146. * @see #setDeltaBaseAsOffset(boolean)
  147. */
  148. public static final boolean DEFAULT_DELTA_BASE_AS_OFFSET = false;
  149. /**
  150. * Default value of maximum delta chain depth.
  151. *
  152. * @see #setMaxDeltaDepth(int)
  153. */
  154. public static final int DEFAULT_MAX_DELTA_DEPTH = 50;
  155. /**
  156. * Default window size during packing.
  157. *
  158. * @see #setDeltaSearchWindowSize(int)
  159. */
  160. public static final int DEFAULT_DELTA_SEARCH_WINDOW_SIZE = 10;
  161. static final long DEFAULT_BIG_FILE_THRESHOLD = 50 * 1024 * 1024;
  162. private static final int PACK_VERSION_GENERATED = 2;
  163. @SuppressWarnings("unchecked")
  164. private final List<ObjectToPack> objectsLists[] = new List[Constants.OBJ_TAG + 1];
  165. {
  166. objectsLists[0] = Collections.<ObjectToPack> emptyList();
  167. objectsLists[Constants.OBJ_COMMIT] = new ArrayList<ObjectToPack>();
  168. objectsLists[Constants.OBJ_TREE] = new ArrayList<ObjectToPack>();
  169. objectsLists[Constants.OBJ_BLOB] = new ArrayList<ObjectToPack>();
  170. objectsLists[Constants.OBJ_TAG] = new ArrayList<ObjectToPack>();
  171. }
  172. private final ObjectIdSubclassMap<ObjectToPack> objectsMap = new ObjectIdSubclassMap<ObjectToPack>();
  173. // edge objects for thin packs
  174. private final ObjectIdSubclassMap<ObjectToPack> edgeObjects = new ObjectIdSubclassMap<ObjectToPack>();
  175. private int compressionLevel;
  176. private Deflater myDeflater;
  177. private final ObjectReader reader;
  178. /** {@link #reader} recast to the reuse interface, if it supports it. */
  179. private final ObjectReuseAsIs reuseSupport;
  180. private List<ObjectToPack> sortedByName;
  181. private byte packcsum[];
  182. private boolean reuseDeltas = DEFAULT_REUSE_DELTAS;
  183. private boolean reuseObjects = DEFAULT_REUSE_OBJECTS;
  184. private boolean deltaBaseAsOffset = DEFAULT_DELTA_BASE_AS_OFFSET;
  185. private boolean deltaCompress = true;
  186. private int maxDeltaDepth = DEFAULT_MAX_DELTA_DEPTH;
  187. private int deltaSearchWindowSize = DEFAULT_DELTA_SEARCH_WINDOW_SIZE;
  188. private int indexVersion;
  189. private long bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
  190. private boolean thin;
  191. private boolean ignoreMissingUninteresting = true;
  192. /**
  193. * Create writer for specified repository.
  194. * <p>
  195. * Objects for packing are specified in {@link #preparePack(Iterator)} or
  196. * {@link #preparePack(ProgressMonitor, Collection, Collection)}.
  197. *
  198. * @param repo
  199. * repository where objects are stored.
  200. */
  201. public PackWriter(final Repository repo) {
  202. this(repo, repo.newObjectReader());
  203. }
  204. /**
  205. * Create a writer to load objects from the specified reader.
  206. * <p>
  207. * Objects for packing are specified in {@link #preparePack(Iterator)} or
  208. * {@link #preparePack(ProgressMonitor, Collection, Collection)}.
  209. *
  210. * @param reader
  211. * reader to read from the repository with.
  212. */
  213. public PackWriter(final ObjectReader reader) {
  214. this(null, reader);
  215. }
  216. /**
  217. * Create writer for specified repository.
  218. * <p>
  219. * Objects for packing are specified in {@link #preparePack(Iterator)} or
  220. * {@link #preparePack(ProgressMonitor, Collection, Collection)}.
  221. *
  222. * @param repo
  223. * repository where objects are stored.
  224. * @param reader
  225. * reader to read from the repository with.
  226. */
  227. public PackWriter(final Repository repo, final ObjectReader reader) {
  228. this.reader = reader;
  229. if (reader instanceof ObjectReuseAsIs)
  230. reuseSupport = ((ObjectReuseAsIs) reader);
  231. else
  232. reuseSupport = null;
  233. final PackConfig pc = configOf(repo).get(PackConfig.KEY);
  234. deltaSearchWindowSize = pc.deltaWindow;
  235. maxDeltaDepth = pc.deltaDepth;
  236. compressionLevel = pc.compression;
  237. indexVersion = pc.indexVersion;
  238. bigFileThreshold = pc.bigFileThreshold;
  239. }
  240. private static Config configOf(final Repository repo) {
  241. if (repo == null)
  242. return new Config();
  243. return repo.getConfig();
  244. }
  245. /**
  246. * Check whether object is configured to reuse deltas existing in
  247. * repository.
  248. * <p>
  249. * Default setting: {@value #DEFAULT_REUSE_DELTAS}
  250. * </p>
  251. *
  252. * @return true if object is configured to reuse deltas; false otherwise.
  253. */
  254. public boolean isReuseDeltas() {
  255. return reuseDeltas;
  256. }
  257. /**
  258. * Set reuse deltas configuration option for this writer. When enabled,
  259. * writer will search for delta representation of object in repository and
  260. * use it if possible. Normally, only deltas with base to another object
  261. * existing in set of objects to pack will be used. Exception is however
  262. * thin-pack (see
  263. * {@link #preparePack(ProgressMonitor, Collection, Collection)} and
  264. * {@link #preparePack(Iterator)}) where base object must exist on other
  265. * side machine.
  266. * <p>
  267. * When raw delta data is directly copied from a pack file, checksum is
  268. * computed to verify data.
  269. * </p>
  270. * <p>
  271. * Default setting: {@value #DEFAULT_REUSE_DELTAS}
  272. * </p>
  273. *
  274. * @param reuseDeltas
  275. * boolean indicating whether or not try to reuse deltas.
  276. */
  277. public void setReuseDeltas(boolean reuseDeltas) {
  278. this.reuseDeltas = reuseDeltas;
  279. }
  280. /**
  281. * Checks whether object is configured to reuse existing objects
  282. * representation in repository.
  283. * <p>
  284. * Default setting: {@value #DEFAULT_REUSE_OBJECTS}
  285. * </p>
  286. *
  287. * @return true if writer is configured to reuse objects representation from
  288. * pack; false otherwise.
  289. */
  290. public boolean isReuseObjects() {
  291. return reuseObjects;
  292. }
  293. /**
  294. * Set reuse objects configuration option for this writer. If enabled,
  295. * writer searches for representation in a pack file. If possible,
  296. * compressed data is directly copied from such a pack file. Data checksum
  297. * is verified.
  298. * <p>
  299. * Default setting: {@value #DEFAULT_REUSE_OBJECTS}
  300. * </p>
  301. *
  302. * @param reuseObjects
  303. * boolean indicating whether or not writer should reuse existing
  304. * objects representation.
  305. */
  306. public void setReuseObjects(boolean reuseObjects) {
  307. this.reuseObjects = reuseObjects;
  308. }
  309. /**
  310. * Check whether writer can store delta base as an offset (new style
  311. * reducing pack size) or should store it as an object id (legacy style,
  312. * compatible with old readers).
  313. * <p>
  314. * Default setting: {@value #DEFAULT_DELTA_BASE_AS_OFFSET}
  315. * </p>
  316. *
  317. * @return true if delta base is stored as an offset; false if it is stored
  318. * as an object id.
  319. */
  320. public boolean isDeltaBaseAsOffset() {
  321. return deltaBaseAsOffset;
  322. }
  323. /**
  324. * Set writer delta base format. Delta base can be written as an offset in a
  325. * pack file (new approach reducing file size) or as an object id (legacy
  326. * approach, compatible with old readers).
  327. * <p>
  328. * Default setting: {@value #DEFAULT_DELTA_BASE_AS_OFFSET}
  329. * </p>
  330. *
  331. * @param deltaBaseAsOffset
  332. * boolean indicating whether delta base can be stored as an
  333. * offset.
  334. */
  335. public void setDeltaBaseAsOffset(boolean deltaBaseAsOffset) {
  336. this.deltaBaseAsOffset = deltaBaseAsOffset;
  337. }
  338. /**
  339. * Check whether the writer will create new deltas on the fly.
  340. * <p>
  341. * Default setting: true
  342. * </p>
  343. *
  344. * @return true if the writer will create a new delta when either
  345. * {@link #isReuseDeltas()} is false, or no suitable delta is
  346. * available for reuse.
  347. */
  348. public boolean isDeltaCompress() {
  349. return deltaCompress;
  350. }
  351. /**
  352. * Set whether or not the writer will create new deltas on the fly.
  353. *
  354. * @param deltaCompress
  355. * true to create deltas when {@link #isReuseDeltas()} is false,
  356. * or when a suitable delta isn't available for reuse. Set to
  357. * false to write whole objects instead.
  358. */
  359. public void setDeltaCompress(boolean deltaCompress) {
  360. this.deltaCompress = deltaCompress;
  361. }
  362. /**
  363. * Get maximum depth of delta chain set up for this writer. Generated chains
  364. * are not longer than this value.
  365. * <p>
  366. * Default setting: {@value #DEFAULT_MAX_DELTA_DEPTH}
  367. * </p>
  368. *
  369. * @return maximum delta chain depth.
  370. */
  371. public int getMaxDeltaDepth() {
  372. return maxDeltaDepth;
  373. }
  374. /**
  375. * Set up maximum depth of delta chain for this writer. Generated chains are
  376. * not longer than this value. Too low value causes low compression level,
  377. * while too big makes unpacking (reading) longer.
  378. * <p>
  379. * Default setting: {@value #DEFAULT_MAX_DELTA_DEPTH}
  380. * </p>
  381. *
  382. * @param maxDeltaDepth
  383. * maximum delta chain depth.
  384. */
  385. public void setMaxDeltaDepth(int maxDeltaDepth) {
  386. this.maxDeltaDepth = maxDeltaDepth;
  387. }
  388. /**
  389. * Get the number of objects to try when looking for a delta base.
  390. *
  391. * @return the object count to be searched.
  392. */
  393. public int getDeltaSearchWindowSize() {
  394. return deltaSearchWindowSize;
  395. }
  396. /**
  397. * Set the number of objects considered when searching for a delta base.
  398. * <p>
  399. * Default setting: {@value #DEFAULT_DELTA_SEARCH_WINDOW_SIZE}
  400. * </p>
  401. *
  402. * @param objectCount
  403. * number of objects to search at once. Must be at least 2.
  404. */
  405. public void setDeltaSearchWindowSize(int objectCount) {
  406. if (objectCount <= 2)
  407. setDeltaCompress(false);
  408. else
  409. deltaSearchWindowSize = objectCount;
  410. }
  411. /**
  412. * Get the maximum file size that will be delta compressed.
  413. * <p>
  414. * Files bigger than this setting will not be delta compressed, as they are
  415. * more than likely already highly compressed binary data files that do not
  416. * delta compress well, such as MPEG videos.
  417. *
  418. * @return the configured big file threshold.
  419. */
  420. public long getBigFileThreshold() {
  421. return bigFileThreshold;
  422. }
  423. /**
  424. * Set the maximum file size that should be considered for deltas.
  425. *
  426. * @param bigFileThreshold
  427. * the limit, in bytes.
  428. */
  429. public void setBigFileThreshold(long bigFileThreshold) {
  430. this.bigFileThreshold = bigFileThreshold;
  431. }
  432. /** @return true if this writer is producing a thin pack. */
  433. public boolean isThin() {
  434. return thin;
  435. }
  436. /**
  437. * @param packthin
  438. * a boolean indicating whether writer may pack objects with
  439. * delta base object not within set of objects to pack, but
  440. * belonging to party repository (uninteresting/boundary) as
  441. * determined by set; this kind of pack is used only for
  442. * transport; true - to produce thin pack, false - otherwise.
  443. */
  444. public void setThin(final boolean packthin) {
  445. thin = packthin;
  446. }
  447. /**
  448. * @return true to ignore objects that are uninteresting and also not found
  449. * on local disk; false to throw a {@link MissingObjectException}
  450. * out of {@link #preparePack(ProgressMonitor, Collection, Collection)} if an
  451. * uninteresting object is not in the source repository. By default,
  452. * true, permitting gracefully ignoring of uninteresting objects.
  453. */
  454. public boolean isIgnoreMissingUninteresting() {
  455. return ignoreMissingUninteresting;
  456. }
  457. /**
  458. * @param ignore
  459. * true if writer should ignore non existing uninteresting
  460. * objects during construction set of objects to pack; false
  461. * otherwise - non existing uninteresting objects may cause
  462. * {@link MissingObjectException}
  463. */
  464. public void setIgnoreMissingUninteresting(final boolean ignore) {
  465. ignoreMissingUninteresting = ignore;
  466. }
  467. /**
  468. * Set the pack index file format version this instance will create.
  469. *
  470. * @param version
  471. * the version to write. The special version 0 designates the
  472. * oldest (most compatible) format available for the objects.
  473. * @see PackIndexWriter
  474. */
  475. public void setIndexVersion(final int version) {
  476. indexVersion = version;
  477. }
  478. /**
  479. * Returns objects number in a pack file that was created by this writer.
  480. *
  481. * @return number of objects in pack.
  482. */
  483. public int getObjectsNumber() {
  484. return objectsMap.size();
  485. }
  486. /**
  487. * Prepare the list of objects to be written to the pack stream.
  488. * <p>
  489. * Iterator <b>exactly</b> determines which objects are included in a pack
  490. * and order they appear in pack (except that objects order by type is not
  491. * needed at input). This order should conform general rules of ordering
  492. * objects in git - by recency and path (type and delta-base first is
  493. * internally secured) and responsibility for guaranteeing this order is on
  494. * a caller side. Iterator must return each id of object to write exactly
  495. * once.
  496. * </p>
  497. * <p>
  498. * When iterator returns object that has {@link RevFlag#UNINTERESTING} flag,
  499. * this object won't be included in an output pack. Instead, it is recorded
  500. * as edge-object (known to remote repository) for thin-pack. In such a case
  501. * writer may pack objects with delta base object not within set of objects
  502. * to pack, but belonging to party repository - those marked with
  503. * {@link RevFlag#UNINTERESTING} flag. This type of pack is used only for
  504. * transport.
  505. * </p>
  506. *
  507. * @param objectsSource
  508. * iterator of object to store in a pack; order of objects within
  509. * each type is important, ordering by type is not needed;
  510. * allowed types for objects are {@link Constants#OBJ_COMMIT},
  511. * {@link Constants#OBJ_TREE}, {@link Constants#OBJ_BLOB} and
  512. * {@link Constants#OBJ_TAG}; objects returned by iterator may
  513. * be later reused by caller as object id and type are internally
  514. * copied in each iteration; if object returned by iterator has
  515. * {@link RevFlag#UNINTERESTING} flag set, it won't be included
  516. * in a pack, but is considered as edge-object for thin-pack.
  517. * @throws IOException
  518. * when some I/O problem occur during reading objects.
  519. */
  520. public void preparePack(final Iterator<RevObject> objectsSource)
  521. throws IOException {
  522. while (objectsSource.hasNext()) {
  523. addObject(objectsSource.next());
  524. }
  525. }
  526. /**
  527. * Prepare the list of objects to be written to the pack stream.
  528. * <p>
  529. * Basing on these 2 sets, another set of objects to put in a pack file is
  530. * created: this set consists of all objects reachable (ancestors) from
  531. * interesting objects, except uninteresting objects and their ancestors.
  532. * This method uses class {@link ObjectWalk} extensively to find out that
  533. * appropriate set of output objects and their optimal order in output pack.
  534. * Order is consistent with general git in-pack rules: sort by object type,
  535. * recency, path and delta-base first.
  536. * </p>
  537. *
  538. * @param countingMonitor
  539. * progress during object enumeration.
  540. * @param interestingObjects
  541. * collection of objects to be marked as interesting (start
  542. * points of graph traversal).
  543. * @param uninterestingObjects
  544. * collection of objects to be marked as uninteresting (end
  545. * points of graph traversal).
  546. * @throws IOException
  547. * when some I/O problem occur during reading objects.
  548. */
  549. public void preparePack(ProgressMonitor countingMonitor,
  550. final Collection<? extends ObjectId> interestingObjects,
  551. final Collection<? extends ObjectId> uninterestingObjects)
  552. throws IOException {
  553. if (countingMonitor == null)
  554. countingMonitor = NullProgressMonitor.INSTANCE;
  555. ObjectWalk walker = setUpWalker(interestingObjects,
  556. uninterestingObjects);
  557. findObjectsToPack(countingMonitor, walker);
  558. }
  559. /**
  560. * Determine if the pack file will contain the requested object.
  561. *
  562. * @param id
  563. * the object to test the existence of.
  564. * @return true if the object will appear in the output pack file.
  565. */
  566. public boolean willInclude(final AnyObjectId id) {
  567. return objectsMap.get(id) != null;
  568. }
  569. /**
  570. * Computes SHA-1 of lexicographically sorted objects ids written in this
  571. * pack, as used to name a pack file in repository.
  572. *
  573. * @return ObjectId representing SHA-1 name of a pack that was created.
  574. */
  575. public ObjectId computeName() {
  576. final byte[] buf = new byte[Constants.OBJECT_ID_LENGTH];
  577. final MessageDigest md = Constants.newMessageDigest();
  578. for (ObjectToPack otp : sortByName()) {
  579. otp.copyRawTo(buf, 0);
  580. md.update(buf, 0, Constants.OBJECT_ID_LENGTH);
  581. }
  582. return ObjectId.fromRaw(md.digest());
  583. }
  584. /**
  585. * Create an index file to match the pack file just written.
  586. * <p>
  587. * This method can only be invoked after {@link #preparePack(Iterator)} or
  588. * {@link #preparePack(ProgressMonitor, Collection, Collection)} has been
  589. * invoked and completed successfully. Writing a corresponding index is an
  590. * optional feature that not all pack users may require.
  591. *
  592. * @param indexStream
  593. * output for the index data. Caller is responsible for closing
  594. * this stream.
  595. * @throws IOException
  596. * the index data could not be written to the supplied stream.
  597. */
  598. public void writeIndex(final OutputStream indexStream) throws IOException {
  599. final List<ObjectToPack> list = sortByName();
  600. final PackIndexWriter iw;
  601. if (indexVersion <= 0)
  602. iw = PackIndexWriter.createOldestPossible(indexStream, list);
  603. else
  604. iw = PackIndexWriter.createVersion(indexStream, indexVersion);
  605. iw.write(list, packcsum);
  606. }
  607. private List<ObjectToPack> sortByName() {
  608. if (sortedByName == null) {
  609. sortedByName = new ArrayList<ObjectToPack>(objectsMap.size());
  610. for (List<ObjectToPack> list : objectsLists) {
  611. for (ObjectToPack otp : list)
  612. sortedByName.add(otp);
  613. }
  614. Collections.sort(sortedByName);
  615. }
  616. return sortedByName;
  617. }
  618. /**
  619. * Write the prepared pack to the supplied stream.
  620. * <p>
  621. * At first, this method collects and sorts objects to pack, then deltas
  622. * search is performed if set up accordingly, finally pack stream is
  623. * written. {@link ProgressMonitor} tasks {@value #COMPRESSING_OBJECTS_PROGRESS}
  624. * (only if reuseDeltas or reuseObjects is enabled) and
  625. * {@value #WRITING_OBJECTS_PROGRESS} are updated during packing.
  626. * </p>
  627. * <p>
  628. * All reused objects data checksum (Adler32/CRC32) is computed and
  629. * validated against existing checksum.
  630. * </p>
  631. *
  632. * @param compressMonitor
  633. * progress monitor to report object compression work.
  634. * @param writeMonitor
  635. * progress monitor to report the number of objects written.
  636. * @param packStream
  637. * output stream of pack data. The stream should be buffered by
  638. * the caller. The caller is responsible for closing the stream.
  639. * @throws IOException
  640. * an error occurred reading a local object's data to include in
  641. * the pack, or writing compressed object data to the output
  642. * stream.
  643. */
  644. public void writePack(ProgressMonitor compressMonitor,
  645. ProgressMonitor writeMonitor, OutputStream packStream)
  646. throws IOException {
  647. if (compressMonitor == null)
  648. compressMonitor = NullProgressMonitor.INSTANCE;
  649. if (writeMonitor == null)
  650. writeMonitor = NullProgressMonitor.INSTANCE;
  651. if ((reuseDeltas || reuseObjects) && reuseSupport != null)
  652. searchForReuse();
  653. final PackOutputStream out = new PackOutputStream(writeMonitor,
  654. packStream, isDeltaBaseAsOffset());
  655. writeMonitor.beginTask(WRITING_OBJECTS_PROGRESS, getObjectsNumber());
  656. out.writeFileHeader(PACK_VERSION_GENERATED, getObjectsNumber());
  657. writeObjects(writeMonitor, out);
  658. writeChecksum(out);
  659. reader.release();
  660. writeMonitor.endTask();
  661. }
  662. /** Release all resources used by this writer. */
  663. public void release() {
  664. reader.release();
  665. if (myDeflater != null) {
  666. myDeflater.end();
  667. myDeflater = null;
  668. }
  669. }
  670. private void searchForReuse() throws IOException {
  671. for (List<ObjectToPack> list : objectsLists) {
  672. for (ObjectToPack otp : list)
  673. reuseSupport.selectObjectRepresentation(this, otp);
  674. }
  675. }
  676. private void writeObjects(ProgressMonitor writeMonitor, PackOutputStream out)
  677. throws IOException {
  678. for (List<ObjectToPack> list : objectsLists) {
  679. for (ObjectToPack otp : list) {
  680. if (writeMonitor.isCancelled())
  681. throw new IOException(
  682. JGitText.get().packingCancelledDuringObjectsWriting);
  683. if (!otp.isWritten())
  684. writeObject(out, otp);
  685. }
  686. }
  687. }
  688. private void writeObject(PackOutputStream out, final ObjectToPack otp)
  689. throws IOException {
  690. if (otp.isWritten())
  691. return; // We shouldn't be here.
  692. otp.markWantWrite();
  693. if (otp.isDeltaRepresentation())
  694. writeBaseFirst(out, otp);
  695. out.resetCRC32();
  696. otp.setOffset(out.length());
  697. while (otp.isReuseAsIs()) {
  698. try {
  699. reuseSupport.copyObjectAsIs(out, otp);
  700. out.endObject();
  701. otp.setCRC(out.getCRC32());
  702. return;
  703. } catch (StoredObjectRepresentationNotAvailableException gone) {
  704. if (otp.getOffset() == out.length()) {
  705. redoSearchForReuse(otp);
  706. continue;
  707. } else {
  708. // Object writing already started, we cannot recover.
  709. //
  710. CorruptObjectException coe;
  711. coe = new CorruptObjectException(otp, "");
  712. coe.initCause(gone);
  713. throw coe;
  714. }
  715. }
  716. }
  717. // If we reached here, reuse wasn't possible.
  718. //
  719. writeWholeObjectDeflate(out, otp);
  720. out.endObject();
  721. otp.setCRC(out.getCRC32());
  722. }
  723. private void writeBaseFirst(PackOutputStream out, final ObjectToPack otp)
  724. throws IOException {
  725. ObjectToPack baseInPack = otp.getDeltaBase();
  726. if (baseInPack != null) {
  727. if (!baseInPack.isWritten()) {
  728. if (baseInPack.wantWrite()) {
  729. // There is a cycle. Our caller is trying to write the
  730. // object we want as a base, and called us. Turn off
  731. // delta reuse so we can find another form.
  732. //
  733. reuseDeltas = false;
  734. redoSearchForReuse(otp);
  735. reuseDeltas = true;
  736. } else {
  737. writeObject(out, baseInPack);
  738. }
  739. }
  740. } else if (!thin) {
  741. // This should never occur, the base isn't in the pack and
  742. // the pack isn't allowed to reference base outside objects.
  743. // Write the object as a whole form, even if that is slow.
  744. //
  745. otp.clearDeltaBase();
  746. otp.clearReuseAsIs();
  747. }
  748. }
  749. private void redoSearchForReuse(final ObjectToPack otp) throws IOException,
  750. MissingObjectException {
  751. otp.clearDeltaBase();
  752. otp.clearReuseAsIs();
  753. reuseSupport.selectObjectRepresentation(this, otp);
  754. }
  755. private void writeWholeObjectDeflate(PackOutputStream out,
  756. final ObjectToPack otp) throws IOException {
  757. final Deflater deflater = deflater();
  758. final ObjectLoader ldr = reader.open(otp, otp.getType());
  759. out.writeHeader(otp, ldr.getSize());
  760. deflater.reset();
  761. DeflaterOutputStream dst = new DeflaterOutputStream(out, deflater);
  762. ldr.copyTo(dst);
  763. dst.finish();
  764. }
  765. private Deflater deflater() {
  766. if (myDeflater == null)
  767. myDeflater = new Deflater(compressionLevel);
  768. return myDeflater;
  769. }
  770. private void writeChecksum(PackOutputStream out) throws IOException {
  771. packcsum = out.getDigest();
  772. out.write(packcsum);
  773. }
  774. private ObjectWalk setUpWalker(
  775. final Collection<? extends ObjectId> interestingObjects,
  776. final Collection<? extends ObjectId> uninterestingObjects)
  777. throws MissingObjectException, IOException,
  778. IncorrectObjectTypeException {
  779. final ObjectWalk walker = new ObjectWalk(reader);
  780. walker.setRetainBody(false);
  781. walker.sort(RevSort.COMMIT_TIME_DESC);
  782. if (thin)
  783. walker.sort(RevSort.BOUNDARY, true);
  784. for (ObjectId id : interestingObjects) {
  785. RevObject o = walker.parseAny(id);
  786. walker.markStart(o);
  787. }
  788. if (uninterestingObjects != null) {
  789. for (ObjectId id : uninterestingObjects) {
  790. final RevObject o;
  791. try {
  792. o = walker.parseAny(id);
  793. } catch (MissingObjectException x) {
  794. if (ignoreMissingUninteresting)
  795. continue;
  796. throw x;
  797. }
  798. walker.markUninteresting(o);
  799. }
  800. }
  801. return walker;
  802. }
  803. private void findObjectsToPack(final ProgressMonitor countingMonitor,
  804. final ObjectWalk walker) throws MissingObjectException,
  805. IncorrectObjectTypeException, IOException {
  806. countingMonitor.beginTask(COUNTING_OBJECTS_PROGRESS,
  807. ProgressMonitor.UNKNOWN);
  808. RevObject o;
  809. while ((o = walker.next()) != null) {
  810. addObject(o, 0);
  811. countingMonitor.update(1);
  812. }
  813. while ((o = walker.nextObject()) != null) {
  814. addObject(o, walker.getPathHashCode());
  815. countingMonitor.update(1);
  816. }
  817. countingMonitor.endTask();
  818. }
  819. /**
  820. * Include one object to the output file.
  821. * <p>
  822. * Objects are written in the order they are added. If the same object is
  823. * added twice, it may be written twice, creating a larger than necessary
  824. * file.
  825. *
  826. * @param object
  827. * the object to add.
  828. * @throws IncorrectObjectTypeException
  829. * the object is an unsupported type.
  830. */
  831. public void addObject(final RevObject object)
  832. throws IncorrectObjectTypeException {
  833. addObject(object, 0);
  834. }
  835. private void addObject(final RevObject object, final int pathHashCode)
  836. throws IncorrectObjectTypeException {
  837. if (object.has(RevFlag.UNINTERESTING)) {
  838. switch (object.getType()) {
  839. case Constants.OBJ_TREE:
  840. case Constants.OBJ_BLOB:
  841. ObjectToPack otp = new ObjectToPack(object);
  842. otp.setPathHash(pathHashCode);
  843. otp.setDoNotDelta(true);
  844. edgeObjects.add(otp);
  845. thin = true;
  846. break;
  847. }
  848. return;
  849. }
  850. final ObjectToPack otp;
  851. if (reuseSupport != null)
  852. otp = reuseSupport.newObjectToPack(object);
  853. else
  854. otp = new ObjectToPack(object);
  855. otp.setPathHash(pathHashCode);
  856. try {
  857. objectsLists[object.getType()].add(otp);
  858. } catch (ArrayIndexOutOfBoundsException x) {
  859. throw new IncorrectObjectTypeException(object,
  860. JGitText.get().incorrectObjectType_COMMITnorTREEnorBLOBnorTAG);
  861. } catch (UnsupportedOperationException x) {
  862. // index pointing to "dummy" empty list
  863. throw new IncorrectObjectTypeException(object,
  864. JGitText.get().incorrectObjectType_COMMITnorTREEnorBLOBnorTAG);
  865. }
  866. objectsMap.add(otp);
  867. }
  868. /**
  869. * Select an object representation for this writer.
  870. * <p>
  871. * An {@link ObjectReader} implementation should invoke this method once for
  872. * each representation available for an object, to allow the writer to find
  873. * the most suitable one for the output.
  874. *
  875. * @param otp
  876. * the object being packed.
  877. * @param next
  878. * the next available representation from the repository.
  879. */
  880. public void select(ObjectToPack otp, StoredObjectRepresentation next) {
  881. int nFmt = next.getFormat();
  882. int nWeight;
  883. if (otp.isReuseAsIs()) {
  884. // We've already chosen to reuse a packed form, if next
  885. // cannot beat that break out early.
  886. //
  887. if (PACK_WHOLE < nFmt)
  888. return; // next isn't packed
  889. else if (PACK_DELTA < nFmt && otp.isDeltaRepresentation())
  890. return; // next isn't a delta, but we are
  891. nWeight = next.getWeight();
  892. if (otp.getWeight() <= nWeight)
  893. return; // next would be bigger
  894. } else
  895. nWeight = next.getWeight();
  896. if (nFmt == PACK_DELTA && reuseDeltas) {
  897. ObjectId baseId = next.getDeltaBase();
  898. ObjectToPack ptr = objectsMap.get(baseId);
  899. if (ptr != null) {
  900. otp.setDeltaBase(ptr);
  901. otp.setReuseAsIs();
  902. otp.setWeight(nWeight);
  903. } else if (thin && edgeObjects.contains(baseId)) {
  904. otp.setDeltaBase(baseId);
  905. otp.setReuseAsIs();
  906. otp.setWeight(nWeight);
  907. } else {
  908. otp.clearDeltaBase();
  909. otp.clearReuseAsIs();
  910. }
  911. } else if (nFmt == PACK_WHOLE && reuseObjects) {
  912. otp.clearDeltaBase();
  913. otp.setReuseAsIs();
  914. otp.setWeight(nWeight);
  915. } else {
  916. otp.clearDeltaBase();
  917. otp.clearReuseAsIs();
  918. }
  919. otp.select(next);
  920. }
  921. }