You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PackWriter.java 32KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039
  1. /*
  2. * Copyright (C) 2008-2010, Google Inc.
  3. * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
  4. * and other copyright owners as documented in the project's IP log.
  5. *
  6. * This program and the accompanying materials are made available
  7. * under the terms of the Eclipse Distribution License v1.0 which
  8. * accompanies this distribution, is reproduced below, and is
  9. * available at http://www.eclipse.org/org/documents/edl-v10.php
  10. *
  11. * All rights reserved.
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. *
  20. * - Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials provided
  23. * with the distribution.
  24. *
  25. * - Neither the name of the Eclipse Foundation, Inc. nor the
  26. * names of its contributors may be used to endorse or promote
  27. * products derived from this software without specific prior
  28. * written permission.
  29. *
  30. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  31. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  32. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  36. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  37. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  38. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  39. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  40. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  42. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. */
  44. package org.eclipse.jgit.lib;
  45. import java.io.IOException;
  46. import java.io.OutputStream;
  47. import java.security.MessageDigest;
  48. import java.util.ArrayList;
  49. import java.util.Collection;
  50. import java.util.Collections;
  51. import java.util.Iterator;
  52. import java.util.List;
  53. import java.util.zip.Deflater;
  54. import org.eclipse.jgit.errors.IncorrectObjectTypeException;
  55. import org.eclipse.jgit.errors.MissingObjectException;
  56. import org.eclipse.jgit.revwalk.ObjectWalk;
  57. import org.eclipse.jgit.revwalk.RevFlag;
  58. import org.eclipse.jgit.revwalk.RevObject;
  59. import org.eclipse.jgit.revwalk.RevSort;
  60. import org.eclipse.jgit.transport.PackedObjectInfo;
  61. import org.eclipse.jgit.util.NB;
  62. /**
  63. * <p>
  64. * PackWriter class is responsible for generating pack files from specified set
  65. * of objects from repository. This implementation produce pack files in format
  66. * version 2.
  67. * </p>
  68. * <p>
  69. * Source of objects may be specified in two ways:
  70. * <ul>
  71. * <li>(usually) by providing sets of interesting and uninteresting objects in
  72. * repository - all interesting objects and their ancestors except uninteresting
  73. * objects and their ancestors will be included in pack, or</li>
  74. * <li>by providing iterator of {@link RevObject} specifying exact list and
  75. * order of objects in pack</li>
  76. * </ul>
  77. * Typical usage consists of creating instance intended for some pack,
  78. * configuring options, preparing the list of objects by calling
  79. * {@link #preparePack(Iterator)} or
  80. * {@link #preparePack(Collection, Collection)}, and finally
  81. * producing the stream with {@link #writePack(OutputStream)}.
  82. * </p>
  83. * <p>
  84. * Class provide set of configurable options and {@link ProgressMonitor}
  85. * support, as operations may take a long time for big repositories. Deltas
  86. * searching algorithm is <b>NOT IMPLEMENTED</b> yet - this implementation
  87. * relies only on deltas and objects reuse.
  88. * </p>
  89. * <p>
  90. * This class is not thread safe, it is intended to be used in one thread, with
  91. * one instance per created pack. Subsequent calls to writePack result in
  92. * undefined behavior.
  93. * </p>
  94. */
  95. public class PackWriter {
  96. /**
  97. * Title of {@link ProgressMonitor} task used during counting objects to
  98. * pack.
  99. *
  100. * @see #preparePack(Collection, Collection)
  101. */
  102. public static final String COUNTING_OBJECTS_PROGRESS = "Counting objects";
  103. /**
  104. * Title of {@link ProgressMonitor} task used during searching for objects
  105. * reuse or delta reuse.
  106. *
  107. * @see #writePack(OutputStream)
  108. */
  109. public static final String SEARCHING_REUSE_PROGRESS = "Compressing objects";
  110. /**
  111. * Title of {@link ProgressMonitor} task used during writing out pack
  112. * (objects)
  113. *
  114. * @see #writePack(OutputStream)
  115. */
  116. public static final String WRITING_OBJECTS_PROGRESS = "Writing objects";
  117. /**
  118. * Default value of deltas reuse option.
  119. *
  120. * @see #setReuseDeltas(boolean)
  121. */
  122. public static final boolean DEFAULT_REUSE_DELTAS = true;
  123. /**
  124. * Default value of objects reuse option.
  125. *
  126. * @see #setReuseObjects(boolean)
  127. */
  128. public static final boolean DEFAULT_REUSE_OBJECTS = true;
  129. /**
  130. * Default value of delta base as offset option.
  131. *
  132. * @see #setDeltaBaseAsOffset(boolean)
  133. */
  134. public static final boolean DEFAULT_DELTA_BASE_AS_OFFSET = false;
  135. /**
  136. * Default value of maximum delta chain depth.
  137. *
  138. * @see #setMaxDeltaDepth(int)
  139. */
  140. public static final int DEFAULT_MAX_DELTA_DEPTH = 50;
  141. private static final int PACK_VERSION_GENERATED = 2;
  142. @SuppressWarnings("unchecked")
  143. private final List<ObjectToPack> objectsLists[] = new List[Constants.OBJ_TAG + 1];
  144. {
  145. objectsLists[0] = Collections.<ObjectToPack> emptyList();
  146. objectsLists[Constants.OBJ_COMMIT] = new ArrayList<ObjectToPack>();
  147. objectsLists[Constants.OBJ_TREE] = new ArrayList<ObjectToPack>();
  148. objectsLists[Constants.OBJ_BLOB] = new ArrayList<ObjectToPack>();
  149. objectsLists[Constants.OBJ_TAG] = new ArrayList<ObjectToPack>();
  150. }
  151. private final ObjectIdSubclassMap<ObjectToPack> objectsMap = new ObjectIdSubclassMap<ObjectToPack>();
  152. // edge objects for thin packs
  153. private final ObjectIdSubclassMap<ObjectId> edgeObjects = new ObjectIdSubclassMap<ObjectId>();
  154. private final Repository db;
  155. private PackOutputStream out;
  156. private final Deflater deflater;
  157. private ProgressMonitor initMonitor;
  158. private ProgressMonitor writeMonitor;
  159. private final byte[] buf = new byte[16384]; // 16 KB
  160. private final WindowCursor windowCursor = new WindowCursor();
  161. private List<ObjectToPack> sortedByName;
  162. private byte packcsum[];
  163. private boolean reuseDeltas = DEFAULT_REUSE_DELTAS;
  164. private boolean reuseObjects = DEFAULT_REUSE_OBJECTS;
  165. private boolean deltaBaseAsOffset = DEFAULT_DELTA_BASE_AS_OFFSET;
  166. private int maxDeltaDepth = DEFAULT_MAX_DELTA_DEPTH;
  167. private int outputVersion;
  168. private boolean thin;
  169. private boolean ignoreMissingUninteresting = true;
  170. /**
  171. * Create writer for specified repository.
  172. * <p>
  173. * Objects for packing are specified in {@link #preparePack(Iterator)} or
  174. * {@link #preparePack(Collection, Collection)}.
  175. *
  176. * @param repo
  177. * repository where objects are stored.
  178. * @param monitor
  179. * operations progress monitor, used within
  180. * {@link #preparePack(Iterator)},
  181. * {@link #preparePack(Collection, Collection)}
  182. * , or {@link #writePack(OutputStream)}.
  183. */
  184. public PackWriter(final Repository repo, final ProgressMonitor monitor) {
  185. this(repo, monitor, monitor);
  186. }
  187. /**
  188. * Create writer for specified repository.
  189. * <p>
  190. * Objects for packing are specified in {@link #preparePack(Iterator)} or
  191. * {@link #preparePack(Collection, Collection)}.
  192. *
  193. * @param repo
  194. * repository where objects are stored.
  195. * @param imonitor
  196. * operations progress monitor, used within
  197. * {@link #preparePack(Iterator)},
  198. * {@link #preparePack(Collection, Collection)}
  199. * @param wmonitor
  200. * operations progress monitor, used within
  201. * {@link #writePack(OutputStream)}.
  202. */
  203. public PackWriter(final Repository repo, final ProgressMonitor imonitor,
  204. final ProgressMonitor wmonitor) {
  205. this.db = repo;
  206. initMonitor = imonitor == null ? NullProgressMonitor.INSTANCE : imonitor;
  207. writeMonitor = wmonitor == null ? NullProgressMonitor.INSTANCE : wmonitor;
  208. this.deflater = new Deflater(db.getConfig().getCore().getCompression());
  209. outputVersion = repo.getConfig().getCore().getPackIndexVersion();
  210. }
  211. /**
  212. * Check whether object is configured to reuse deltas existing in
  213. * repository.
  214. * <p>
  215. * Default setting: {@value #DEFAULT_REUSE_DELTAS}
  216. * </p>
  217. *
  218. * @return true if object is configured to reuse deltas; false otherwise.
  219. */
  220. public boolean isReuseDeltas() {
  221. return reuseDeltas;
  222. }
  223. /**
  224. * Set reuse deltas configuration option for this writer. When enabled,
  225. * writer will search for delta representation of object in repository and
  226. * use it if possible. Normally, only deltas with base to another object
  227. * existing in set of objects to pack will be used. Exception is however
  228. * thin-pack (see
  229. * {@link #preparePack(Collection, Collection)} and
  230. * {@link #preparePack(Iterator)}) where base object must exist on other
  231. * side machine.
  232. * <p>
  233. * When raw delta data is directly copied from a pack file, checksum is
  234. * computed to verify data.
  235. * </p>
  236. * <p>
  237. * Default setting: {@value #DEFAULT_REUSE_DELTAS}
  238. * </p>
  239. *
  240. * @param reuseDeltas
  241. * boolean indicating whether or not try to reuse deltas.
  242. */
  243. public void setReuseDeltas(boolean reuseDeltas) {
  244. this.reuseDeltas = reuseDeltas;
  245. }
  246. /**
  247. * Checks whether object is configured to reuse existing objects
  248. * representation in repository.
  249. * <p>
  250. * Default setting: {@value #DEFAULT_REUSE_OBJECTS}
  251. * </p>
  252. *
  253. * @return true if writer is configured to reuse objects representation from
  254. * pack; false otherwise.
  255. */
  256. public boolean isReuseObjects() {
  257. return reuseObjects;
  258. }
  259. /**
  260. * Set reuse objects configuration option for this writer. If enabled,
  261. * writer searches for representation in a pack file. If possible,
  262. * compressed data is directly copied from such a pack file. Data checksum
  263. * is verified.
  264. * <p>
  265. * Default setting: {@value #DEFAULT_REUSE_OBJECTS}
  266. * </p>
  267. *
  268. * @param reuseObjects
  269. * boolean indicating whether or not writer should reuse existing
  270. * objects representation.
  271. */
  272. public void setReuseObjects(boolean reuseObjects) {
  273. this.reuseObjects = reuseObjects;
  274. }
  275. /**
  276. * Check whether writer can store delta base as an offset (new style
  277. * reducing pack size) or should store it as an object id (legacy style,
  278. * compatible with old readers).
  279. * <p>
  280. * Default setting: {@value #DEFAULT_DELTA_BASE_AS_OFFSET}
  281. * </p>
  282. *
  283. * @return true if delta base is stored as an offset; false if it is stored
  284. * as an object id.
  285. */
  286. public boolean isDeltaBaseAsOffset() {
  287. return deltaBaseAsOffset;
  288. }
  289. /**
  290. * Set writer delta base format. Delta base can be written as an offset in a
  291. * pack file (new approach reducing file size) or as an object id (legacy
  292. * approach, compatible with old readers).
  293. * <p>
  294. * Default setting: {@value #DEFAULT_DELTA_BASE_AS_OFFSET}
  295. * </p>
  296. *
  297. * @param deltaBaseAsOffset
  298. * boolean indicating whether delta base can be stored as an
  299. * offset.
  300. */
  301. public void setDeltaBaseAsOffset(boolean deltaBaseAsOffset) {
  302. this.deltaBaseAsOffset = deltaBaseAsOffset;
  303. }
  304. /**
  305. * Get maximum depth of delta chain set up for this writer. Generated chains
  306. * are not longer than this value.
  307. * <p>
  308. * Default setting: {@value #DEFAULT_MAX_DELTA_DEPTH}
  309. * </p>
  310. *
  311. * @return maximum delta chain depth.
  312. */
  313. public int getMaxDeltaDepth() {
  314. return maxDeltaDepth;
  315. }
  316. /**
  317. * Set up maximum depth of delta chain for this writer. Generated chains are
  318. * not longer than this value. Too low value causes low compression level,
  319. * while too big makes unpacking (reading) longer.
  320. * <p>
  321. * Default setting: {@value #DEFAULT_MAX_DELTA_DEPTH}
  322. * </p>
  323. *
  324. * @param maxDeltaDepth
  325. * maximum delta chain depth.
  326. */
  327. public void setMaxDeltaDepth(int maxDeltaDepth) {
  328. this.maxDeltaDepth = maxDeltaDepth;
  329. }
  330. /** @return true if this writer is producing a thin pack. */
  331. public boolean isThin() {
  332. return thin;
  333. }
  334. /**
  335. * @param packthin
  336. * a boolean indicating whether writer may pack objects with
  337. * delta base object not within set of objects to pack, but
  338. * belonging to party repository (uninteresting/boundary) as
  339. * determined by set; this kind of pack is used only for
  340. * transport; true - to produce thin pack, false - otherwise.
  341. */
  342. public void setThin(final boolean packthin) {
  343. thin = packthin;
  344. }
  345. /**
  346. * @return true to ignore objects that are uninteresting and also not found
  347. * on local disk; false to throw a {@link MissingObjectException}
  348. * out of {@link #preparePack(Collection, Collection)} if an
  349. * uninteresting object is not in the source repository. By default,
  350. * true, permitting gracefully ignoring of uninteresting objects.
  351. */
  352. public boolean isIgnoreMissingUninteresting() {
  353. return ignoreMissingUninteresting;
  354. }
  355. /**
  356. * @param ignore
  357. * true if writer should ignore non existing uninteresting
  358. * objects during construction set of objects to pack; false
  359. * otherwise - non existing uninteresting objects may cause
  360. * {@link MissingObjectException}
  361. */
  362. public void setIgnoreMissingUninteresting(final boolean ignore) {
  363. ignoreMissingUninteresting = ignore;
  364. }
  365. /**
  366. * Set the pack index file format version this instance will create.
  367. *
  368. * @param version
  369. * the version to write. The special version 0 designates the
  370. * oldest (most compatible) format available for the objects.
  371. * @see PackIndexWriter
  372. */
  373. public void setIndexVersion(final int version) {
  374. outputVersion = version;
  375. }
  376. /**
  377. * Returns objects number in a pack file that was created by this writer.
  378. *
  379. * @return number of objects in pack.
  380. */
  381. public int getObjectsNumber() {
  382. return objectsMap.size();
  383. }
  384. /**
  385. * Prepare the list of objects to be written to the pack stream.
  386. * <p>
  387. * Iterator <b>exactly</b> determines which objects are included in a pack
  388. * and order they appear in pack (except that objects order by type is not
  389. * needed at input). This order should conform general rules of ordering
  390. * objects in git - by recency and path (type and delta-base first is
  391. * internally secured) and responsibility for guaranteeing this order is on
  392. * a caller side. Iterator must return each id of object to write exactly
  393. * once.
  394. * </p>
  395. * <p>
  396. * When iterator returns object that has {@link RevFlag#UNINTERESTING} flag,
  397. * this object won't be included in an output pack. Instead, it is recorded
  398. * as edge-object (known to remote repository) for thin-pack. In such a case
  399. * writer may pack objects with delta base object not within set of objects
  400. * to pack, but belonging to party repository - those marked with
  401. * {@link RevFlag#UNINTERESTING} flag. This type of pack is used only for
  402. * transport.
  403. * </p>
  404. *
  405. * @param objectsSource
  406. * iterator of object to store in a pack; order of objects within
  407. * each type is important, ordering by type is not needed;
  408. * allowed types for objects are {@link Constants#OBJ_COMMIT},
  409. * {@link Constants#OBJ_TREE}, {@link Constants#OBJ_BLOB} and
  410. * {@link Constants#OBJ_TAG}; objects returned by iterator may
  411. * be later reused by caller as object id and type are internally
  412. * copied in each iteration; if object returned by iterator has
  413. * {@link RevFlag#UNINTERESTING} flag set, it won't be included
  414. * in a pack, but is considered as edge-object for thin-pack.
  415. * @throws IOException
  416. * when some I/O problem occur during reading objects.
  417. */
  418. public void preparePack(final Iterator<RevObject> objectsSource)
  419. throws IOException {
  420. while (objectsSource.hasNext()) {
  421. addObject(objectsSource.next());
  422. }
  423. }
  424. /**
  425. * Prepare the list of objects to be written to the pack stream.
  426. * <p>
  427. * Basing on these 2 sets, another set of objects to put in a pack file is
  428. * created: this set consists of all objects reachable (ancestors) from
  429. * interesting objects, except uninteresting objects and their ancestors.
  430. * This method uses class {@link ObjectWalk} extensively to find out that
  431. * appropriate set of output objects and their optimal order in output pack.
  432. * Order is consistent with general git in-pack rules: sort by object type,
  433. * recency, path and delta-base first.
  434. * </p>
  435. *
  436. * @param interestingObjects
  437. * collection of objects to be marked as interesting (start
  438. * points of graph traversal).
  439. * @param uninterestingObjects
  440. * collection of objects to be marked as uninteresting (end
  441. * points of graph traversal).
  442. * @throws IOException
  443. * when some I/O problem occur during reading objects.
  444. */
  445. public void preparePack(
  446. final Collection<? extends ObjectId> interestingObjects,
  447. final Collection<? extends ObjectId> uninterestingObjects)
  448. throws IOException {
  449. ObjectWalk walker = setUpWalker(interestingObjects,
  450. uninterestingObjects);
  451. findObjectsToPack(walker);
  452. }
  453. /**
  454. * Determine if the pack file will contain the requested object.
  455. *
  456. * @param id
  457. * the object to test the existence of.
  458. * @return true if the object will appear in the output pack file.
  459. */
  460. public boolean willInclude(final AnyObjectId id) {
  461. return objectsMap.get(id) != null;
  462. }
  463. /**
  464. * Computes SHA-1 of lexicographically sorted objects ids written in this
  465. * pack, as used to name a pack file in repository.
  466. *
  467. * @return ObjectId representing SHA-1 name of a pack that was created.
  468. */
  469. public ObjectId computeName() {
  470. final MessageDigest md = Constants.newMessageDigest();
  471. for (ObjectToPack otp : sortByName()) {
  472. otp.copyRawTo(buf, 0);
  473. md.update(buf, 0, Constants.OBJECT_ID_LENGTH);
  474. }
  475. return ObjectId.fromRaw(md.digest());
  476. }
  477. /**
  478. * Create an index file to match the pack file just written.
  479. * <p>
  480. * This method can only be invoked after {@link #preparePack(Iterator)} or
  481. * {@link #preparePack(Collection, Collection)} has been
  482. * invoked and completed successfully. Writing a corresponding index is an
  483. * optional feature that not all pack users may require.
  484. *
  485. * @param indexStream
  486. * output for the index data. Caller is responsible for closing
  487. * this stream.
  488. * @throws IOException
  489. * the index data could not be written to the supplied stream.
  490. */
  491. public void writeIndex(final OutputStream indexStream) throws IOException {
  492. final List<ObjectToPack> list = sortByName();
  493. final PackIndexWriter iw;
  494. if (outputVersion <= 0)
  495. iw = PackIndexWriter.createOldestPossible(indexStream, list);
  496. else
  497. iw = PackIndexWriter.createVersion(indexStream, outputVersion);
  498. iw.write(list, packcsum);
  499. }
  500. private List<ObjectToPack> sortByName() {
  501. if (sortedByName == null) {
  502. sortedByName = new ArrayList<ObjectToPack>(objectsMap.size());
  503. for (List<ObjectToPack> list : objectsLists) {
  504. for (ObjectToPack otp : list)
  505. sortedByName.add(otp);
  506. }
  507. Collections.sort(sortedByName);
  508. }
  509. return sortedByName;
  510. }
  511. /**
  512. * Write the prepared pack to the supplied stream.
  513. * <p>
  514. * At first, this method collects and sorts objects to pack, then deltas
  515. * search is performed if set up accordingly, finally pack stream is
  516. * written. {@link ProgressMonitor} tasks {@value #SEARCHING_REUSE_PROGRESS}
  517. * (only if reuseDeltas or reuseObjects is enabled) and
  518. * {@value #WRITING_OBJECTS_PROGRESS} are updated during packing.
  519. * </p>
  520. * <p>
  521. * All reused objects data checksum (Adler32/CRC32) is computed and
  522. * validated against existing checksum.
  523. * </p>
  524. *
  525. * @param packStream
  526. * output stream of pack data. The stream should be buffered by
  527. * the caller. The caller is responsible for closing the stream.
  528. * @throws IOException
  529. * an error occurred reading a local object's data to include in
  530. * the pack, or writing compressed object data to the output
  531. * stream.
  532. */
  533. public void writePack(OutputStream packStream) throws IOException {
  534. if (reuseDeltas || reuseObjects)
  535. searchForReuse();
  536. out = new PackOutputStream(packStream);
  537. writeMonitor.beginTask(WRITING_OBJECTS_PROGRESS, getObjectsNumber());
  538. writeHeader();
  539. writeObjects();
  540. writeChecksum();
  541. windowCursor.release();
  542. writeMonitor.endTask();
  543. }
  544. private void searchForReuse() throws IOException {
  545. initMonitor.beginTask(SEARCHING_REUSE_PROGRESS, getObjectsNumber());
  546. final Collection<PackedObjectLoader> reuseLoaders = new ArrayList<PackedObjectLoader>();
  547. for (List<ObjectToPack> list : objectsLists) {
  548. for (ObjectToPack otp : list) {
  549. if (initMonitor.isCancelled())
  550. throw new IOException(
  551. "Packing cancelled during objects writing");
  552. reuseLoaders.clear();
  553. searchForReuse(reuseLoaders, otp);
  554. initMonitor.update(1);
  555. }
  556. }
  557. initMonitor.endTask();
  558. }
  559. private void searchForReuse(
  560. final Collection<PackedObjectLoader> reuseLoaders,
  561. final ObjectToPack otp) throws IOException {
  562. db.openObjectInAllPacks(otp, reuseLoaders, windowCursor);
  563. if (reuseDeltas) {
  564. selectDeltaReuseForObject(otp, reuseLoaders);
  565. }
  566. // delta reuse is preferred over object reuse
  567. if (reuseObjects && !otp.isCopyable()) {
  568. selectObjectReuseForObject(otp, reuseLoaders);
  569. }
  570. }
  571. private void selectDeltaReuseForObject(final ObjectToPack otp,
  572. final Collection<PackedObjectLoader> loaders) throws IOException {
  573. PackedObjectLoader bestLoader = null;
  574. ObjectId bestBase = null;
  575. for (PackedObjectLoader loader : loaders) {
  576. ObjectId idBase = loader.getDeltaBase();
  577. if (idBase == null)
  578. continue;
  579. ObjectToPack otpBase = objectsMap.get(idBase);
  580. // only if base is in set of objects to write or thin-pack's edge
  581. if ((otpBase != null || (thin && edgeObjects.get(idBase) != null))
  582. // select smallest possible delta if > 1 available
  583. && isBetterDeltaReuseLoader(bestLoader, loader)) {
  584. bestLoader = loader;
  585. bestBase = (otpBase != null ? otpBase : idBase);
  586. }
  587. }
  588. if (bestLoader != null) {
  589. otp.setCopyFromPack(bestLoader);
  590. otp.setDeltaBase(bestBase);
  591. }
  592. }
  593. private static boolean isBetterDeltaReuseLoader(
  594. PackedObjectLoader currentLoader, PackedObjectLoader loader)
  595. throws IOException {
  596. if (currentLoader == null)
  597. return true;
  598. if (loader.getRawSize() < currentLoader.getRawSize())
  599. return true;
  600. return (loader.getRawSize() == currentLoader.getRawSize()
  601. && loader.supportsFastCopyRawData() && !currentLoader
  602. .supportsFastCopyRawData());
  603. }
  604. private void selectObjectReuseForObject(final ObjectToPack otp,
  605. final Collection<PackedObjectLoader> loaders) {
  606. for (final PackedObjectLoader loader : loaders) {
  607. if (loader instanceof WholePackedObjectLoader) {
  608. otp.setCopyFromPack(loader);
  609. return;
  610. }
  611. }
  612. }
  613. private void writeHeader() throws IOException {
  614. System.arraycopy(Constants.PACK_SIGNATURE, 0, buf, 0, 4);
  615. NB.encodeInt32(buf, 4, PACK_VERSION_GENERATED);
  616. NB.encodeInt32(buf, 8, getObjectsNumber());
  617. out.write(buf, 0, 12);
  618. }
  619. private void writeObjects() throws IOException {
  620. for (List<ObjectToPack> list : objectsLists) {
  621. for (ObjectToPack otp : list) {
  622. if (writeMonitor.isCancelled())
  623. throw new IOException(
  624. "Packing cancelled during objects writing");
  625. if (!otp.isWritten())
  626. writeObject(otp);
  627. }
  628. }
  629. }
  630. private void writeObject(final ObjectToPack otp) throws IOException {
  631. otp.markWantWrite();
  632. if (otp.isDeltaRepresentation()) {
  633. ObjectToPack deltaBase = otp.getDeltaBase();
  634. assert deltaBase != null || thin;
  635. if (deltaBase != null && !deltaBase.isWritten()) {
  636. if (deltaBase.wantWrite()) {
  637. otp.clearDeltaBase(); // cycle detected
  638. otp.clearSourcePack();
  639. } else {
  640. writeObject(deltaBase);
  641. }
  642. }
  643. }
  644. assert !otp.isWritten();
  645. out.resetCRC32();
  646. otp.setOffset(out.length());
  647. final PackedObjectLoader reuse = open(otp);
  648. if (reuse != null) {
  649. try {
  650. if (otp.isDeltaRepresentation())
  651. writeDeltaObjectHeader(otp, reuse);
  652. else
  653. writeObjectHeader(otp.getType(), reuse.getSize());
  654. reuse.copyRawData(out, buf, windowCursor);
  655. } finally {
  656. reuse.endCopyRawData();
  657. }
  658. } else if (otp.isDeltaRepresentation()) {
  659. throw new IOException("creating deltas is not implemented");
  660. } else {
  661. writeWholeObjectDeflate(otp);
  662. }
  663. otp.setCRC(out.getCRC32());
  664. writeMonitor.update(1);
  665. }
  666. private PackedObjectLoader open(final ObjectToPack otp) throws IOException {
  667. while (otp.isCopyable()) {
  668. try {
  669. PackedObjectLoader reuse = otp.getCopyLoader(windowCursor);
  670. reuse.beginCopyRawData();
  671. return reuse;
  672. } catch (IOException err) {
  673. // The pack we found the object in originally is gone, or
  674. // it has been overwritten with a different layout.
  675. //
  676. otp.clearDeltaBase();
  677. otp.clearSourcePack();
  678. searchForReuse(new ArrayList<PackedObjectLoader>(), otp);
  679. continue;
  680. }
  681. }
  682. return null;
  683. }
  684. private void writeWholeObjectDeflate(final ObjectToPack otp)
  685. throws IOException {
  686. final ObjectLoader loader = db.openObject(windowCursor, otp);
  687. final byte[] data = loader.getCachedBytes();
  688. writeObjectHeader(otp.getType(), data.length);
  689. deflater.reset();
  690. deflater.setInput(data, 0, data.length);
  691. deflater.finish();
  692. do {
  693. final int n = deflater.deflate(buf, 0, buf.length);
  694. if (n > 0)
  695. out.write(buf, 0, n);
  696. } while (!deflater.finished());
  697. }
  698. private void writeDeltaObjectHeader(final ObjectToPack otp,
  699. final PackedObjectLoader reuse) throws IOException {
  700. if (deltaBaseAsOffset && otp.getDeltaBase() != null) {
  701. writeObjectHeader(Constants.OBJ_OFS_DELTA, reuse.getRawSize());
  702. final ObjectToPack deltaBase = otp.getDeltaBase();
  703. long offsetDiff = otp.getOffset() - deltaBase.getOffset();
  704. int pos = buf.length - 1;
  705. buf[pos] = (byte) (offsetDiff & 0x7F);
  706. while ((offsetDiff >>= 7) > 0) {
  707. buf[--pos] = (byte) (0x80 | (--offsetDiff & 0x7F));
  708. }
  709. out.write(buf, pos, buf.length - pos);
  710. } else {
  711. writeObjectHeader(Constants.OBJ_REF_DELTA, reuse.getRawSize());
  712. otp.getDeltaBaseId().copyRawTo(buf, 0);
  713. out.write(buf, 0, Constants.OBJECT_ID_LENGTH);
  714. }
  715. }
  716. private void writeObjectHeader(final int objectType, long dataLength)
  717. throws IOException {
  718. long nextLength = dataLength >>> 4;
  719. int size = 0;
  720. buf[size++] = (byte) ((nextLength > 0 ? 0x80 : 0x00)
  721. | (objectType << 4) | (dataLength & 0x0F));
  722. dataLength = nextLength;
  723. while (dataLength > 0) {
  724. nextLength >>>= 7;
  725. buf[size++] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (dataLength & 0x7F));
  726. dataLength = nextLength;
  727. }
  728. out.write(buf, 0, size);
  729. }
  730. private void writeChecksum() throws IOException {
  731. packcsum = out.getDigest();
  732. out.write(packcsum);
  733. }
  734. private ObjectWalk setUpWalker(
  735. final Collection<? extends ObjectId> interestingObjects,
  736. final Collection<? extends ObjectId> uninterestingObjects)
  737. throws MissingObjectException, IOException,
  738. IncorrectObjectTypeException {
  739. final ObjectWalk walker = new ObjectWalk(db);
  740. walker.setRetainBody(false);
  741. walker.sort(RevSort.TOPO);
  742. walker.sort(RevSort.COMMIT_TIME_DESC, true);
  743. if (thin)
  744. walker.sort(RevSort.BOUNDARY, true);
  745. for (ObjectId id : interestingObjects) {
  746. RevObject o = walker.parseAny(id);
  747. walker.markStart(o);
  748. }
  749. if (uninterestingObjects != null) {
  750. for (ObjectId id : uninterestingObjects) {
  751. final RevObject o;
  752. try {
  753. o = walker.parseAny(id);
  754. } catch (MissingObjectException x) {
  755. if (ignoreMissingUninteresting)
  756. continue;
  757. throw x;
  758. }
  759. walker.markUninteresting(o);
  760. }
  761. }
  762. return walker;
  763. }
  764. private void findObjectsToPack(final ObjectWalk walker)
  765. throws MissingObjectException, IncorrectObjectTypeException,
  766. IOException {
  767. initMonitor.beginTask(COUNTING_OBJECTS_PROGRESS,
  768. ProgressMonitor.UNKNOWN);
  769. RevObject o;
  770. while ((o = walker.next()) != null) {
  771. addObject(o);
  772. initMonitor.update(1);
  773. }
  774. while ((o = walker.nextObject()) != null) {
  775. addObject(o);
  776. initMonitor.update(1);
  777. }
  778. initMonitor.endTask();
  779. }
  780. /**
  781. * Include one object to the output file.
  782. * <p>
  783. * Objects are written in the order they are added. If the same object is
  784. * added twice, it may be written twice, creating a larger than necessary
  785. * file.
  786. *
  787. * @param object
  788. * the object to add.
  789. * @throws IncorrectObjectTypeException
  790. * the object is an unsupported type.
  791. */
  792. public void addObject(final RevObject object)
  793. throws IncorrectObjectTypeException {
  794. if (object.has(RevFlag.UNINTERESTING)) {
  795. edgeObjects.add(object);
  796. thin = true;
  797. return;
  798. }
  799. final ObjectToPack otp = new ObjectToPack(object, object.getType());
  800. try {
  801. objectsLists[object.getType()].add(otp);
  802. } catch (ArrayIndexOutOfBoundsException x) {
  803. throw new IncorrectObjectTypeException(object,
  804. "COMMIT nor TREE nor BLOB nor TAG");
  805. } catch (UnsupportedOperationException x) {
  806. // index pointing to "dummy" empty list
  807. throw new IncorrectObjectTypeException(object,
  808. "COMMIT nor TREE nor BLOB nor TAG");
  809. }
  810. objectsMap.add(otp);
  811. }
  812. /**
  813. * Class holding information about object that is going to be packed by
  814. * {@link PackWriter}. Information include object representation in a
  815. * pack-file and object status.
  816. *
  817. */
  818. static class ObjectToPack extends PackedObjectInfo {
  819. /** Other object being packed that this will delta against. */
  820. private ObjectId deltaBase;
  821. /** Pack to reuse compressed data from, otherwise null. */
  822. private PackFile copyFromPack;
  823. /** Offset of the object's header in {@link #copyFromPack}. */
  824. private long copyOffset;
  825. /**
  826. * Bit field, from bit 0 to bit 31:
  827. * <ul>
  828. * <li>1 bit: wantWrite</li>
  829. * <li>3 bits: type</li>
  830. * <li>28 bits: deltaDepth</li>
  831. * </ul>
  832. */
  833. private int flags;
  834. /**
  835. * Construct object for specified object id. <br/> By default object is
  836. * marked as not written and non-delta packed (as a whole object).
  837. *
  838. * @param src
  839. * object id of object for packing
  840. * @param type
  841. * real type code of the object, not its in-pack type.
  842. */
  843. ObjectToPack(AnyObjectId src, final int type) {
  844. super(src);
  845. flags |= type << 1;
  846. }
  847. /**
  848. * @return delta base object id if object is going to be packed in delta
  849. * representation; null otherwise - if going to be packed as a
  850. * whole object.
  851. */
  852. ObjectId getDeltaBaseId() {
  853. return deltaBase;
  854. }
  855. /**
  856. * @return delta base object to pack if object is going to be packed in
  857. * delta representation and delta is specified as object to
  858. * pack; null otherwise - if going to be packed as a whole
  859. * object or delta base is specified only as id.
  860. */
  861. ObjectToPack getDeltaBase() {
  862. if (deltaBase instanceof ObjectToPack)
  863. return (ObjectToPack) deltaBase;
  864. return null;
  865. }
  866. /**
  867. * Set delta base for the object. Delta base set by this method is used
  868. * by {@link PackWriter} to write object - determines its representation
  869. * in a created pack.
  870. *
  871. * @param deltaBase
  872. * delta base object or null if object should be packed as a
  873. * whole object.
  874. *
  875. */
  876. void setDeltaBase(ObjectId deltaBase) {
  877. this.deltaBase = deltaBase;
  878. }
  879. void clearDeltaBase() {
  880. this.deltaBase = null;
  881. }
  882. /**
  883. * @return true if object is going to be written as delta; false
  884. * otherwise.
  885. */
  886. boolean isDeltaRepresentation() {
  887. return deltaBase != null;
  888. }
  889. /**
  890. * Check if object is already written in a pack. This information is
  891. * used to achieve delta-base precedence in a pack file.
  892. *
  893. * @return true if object is already written; false otherwise.
  894. */
  895. boolean isWritten() {
  896. return getOffset() != 0;
  897. }
  898. boolean isCopyable() {
  899. return copyFromPack != null;
  900. }
  901. PackedObjectLoader getCopyLoader(WindowCursor curs) throws IOException {
  902. return copyFromPack.resolveBase(curs, copyOffset);
  903. }
  904. void setCopyFromPack(PackedObjectLoader loader) {
  905. this.copyFromPack = loader.pack;
  906. this.copyOffset = loader.objectOffset;
  907. }
  908. void clearSourcePack() {
  909. copyFromPack = null;
  910. }
  911. int getType() {
  912. return (flags>>1) & 0x7;
  913. }
  914. int getDeltaDepth() {
  915. return flags >>> 4;
  916. }
  917. void updateDeltaDepth() {
  918. final int d;
  919. if (deltaBase instanceof ObjectToPack)
  920. d = ((ObjectToPack) deltaBase).getDeltaDepth() + 1;
  921. else if (deltaBase != null)
  922. d = 1;
  923. else
  924. d = 0;
  925. flags = (d << 4) | flags & 0x15;
  926. }
  927. boolean wantWrite() {
  928. return (flags & 1) == 1;
  929. }
  930. void markWantWrite() {
  931. flags |= 1;
  932. }
  933. }
  934. }