You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PackWriter.java 32KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039
  1. /*
  2. * Copyright (C) 2008-2010, Google Inc.
  3. * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
  4. * and other copyright owners as documented in the project's IP log.
  5. *
  6. * This program and the accompanying materials are made available
  7. * under the terms of the Eclipse Distribution License v1.0 which
  8. * accompanies this distribution, is reproduced below, and is
  9. * available at http://www.eclipse.org/org/documents/edl-v10.php
  10. *
  11. * All rights reserved.
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. *
  20. * - Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials provided
  23. * with the distribution.
  24. *
  25. * - Neither the name of the Eclipse Foundation, Inc. nor the
  26. * names of its contributors may be used to endorse or promote
  27. * products derived from this software without specific prior
  28. * written permission.
  29. *
  30. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  31. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  32. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  36. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  37. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  38. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  39. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  40. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  42. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. */
  44. package org.eclipse.jgit.lib;
  45. import java.io.IOException;
  46. import java.io.OutputStream;
  47. import java.security.MessageDigest;
  48. import java.util.ArrayList;
  49. import java.util.Collection;
  50. import java.util.Collections;
  51. import java.util.Iterator;
  52. import java.util.List;
  53. import java.util.zip.Deflater;
  54. import org.eclipse.jgit.errors.IncorrectObjectTypeException;
  55. import org.eclipse.jgit.errors.MissingObjectException;
  56. import org.eclipse.jgit.revwalk.ObjectWalk;
  57. import org.eclipse.jgit.revwalk.RevFlag;
  58. import org.eclipse.jgit.revwalk.RevObject;
  59. import org.eclipse.jgit.revwalk.RevSort;
  60. import org.eclipse.jgit.transport.PackedObjectInfo;
  61. import org.eclipse.jgit.util.NB;
  62. /**
  63. * <p>
  64. * PackWriter class is responsible for generating pack files from specified set
  65. * of objects from repository. This implementation produce pack files in format
  66. * version 2.
  67. * </p>
  68. * <p>
  69. * Source of objects may be specified in two ways:
  70. * <ul>
  71. * <li>(usually) by providing sets of interesting and uninteresting objects in
  72. * repository - all interesting objects and their ancestors except uninteresting
  73. * objects and their ancestors will be included in pack, or</li>
  74. * <li>by providing iterator of {@link RevObject} specifying exact list and
  75. * order of objects in pack</li>
  76. * </ul>
  77. * Typical usage consists of creating instance intended for some pack,
  78. * configuring options, preparing the list of objects by calling
  79. * {@link #preparePack(Iterator)} or
  80. * {@link #preparePack(Collection, Collection)}, and finally
  81. * producing the stream with {@link #writePack(OutputStream)}.
  82. * </p>
  83. * <p>
  84. * Class provide set of configurable options and {@link ProgressMonitor}
  85. * support, as operations may take a long time for big repositories. Deltas
  86. * searching algorithm is <b>NOT IMPLEMENTED</b> yet - this implementation
  87. * relies only on deltas and objects reuse.
  88. * </p>
  89. * <p>
  90. * This class is not thread safe, it is intended to be used in one thread, with
  91. * one instance per created pack. Subsequent calls to writePack result in
  92. * undefined behavior.
  93. * </p>
  94. */
  95. public class PackWriter {
  96. /**
  97. * Title of {@link ProgressMonitor} task used during counting objects to
  98. * pack.
  99. *
  100. * @see #preparePack(Collection, Collection)
  101. */
  102. public static final String COUNTING_OBJECTS_PROGRESS = "Counting objects";
  103. /**
  104. * Title of {@link ProgressMonitor} task used during searching for objects
  105. * reuse or delta reuse.
  106. *
  107. * @see #writePack(OutputStream)
  108. */
  109. public static final String SEARCHING_REUSE_PROGRESS = "Compressing objects";
  110. /**
  111. * Title of {@link ProgressMonitor} task used during writing out pack
  112. * (objects)
  113. *
  114. * @see #writePack(OutputStream)
  115. */
  116. public static final String WRITING_OBJECTS_PROGRESS = "Writing objects";
  117. /**
  118. * Default value of deltas reuse option.
  119. *
  120. * @see #setReuseDeltas(boolean)
  121. */
  122. public static final boolean DEFAULT_REUSE_DELTAS = true;
  123. /**
  124. * Default value of objects reuse option.
  125. *
  126. * @see #setReuseObjects(boolean)
  127. */
  128. public static final boolean DEFAULT_REUSE_OBJECTS = true;
  129. /**
  130. * Default value of delta base as offset option.
  131. *
  132. * @see #setDeltaBaseAsOffset(boolean)
  133. */
  134. public static final boolean DEFAULT_DELTA_BASE_AS_OFFSET = false;
  135. /**
  136. * Default value of maximum delta chain depth.
  137. *
  138. * @see #setMaxDeltaDepth(int)
  139. */
  140. public static final int DEFAULT_MAX_DELTA_DEPTH = 50;
  141. private static final int PACK_VERSION_GENERATED = 2;
  142. @SuppressWarnings("unchecked")
  143. private final List<ObjectToPack> objectsLists[] = new List[Constants.OBJ_TAG + 1];
  144. {
  145. objectsLists[0] = Collections.<ObjectToPack> emptyList();
  146. objectsLists[Constants.OBJ_COMMIT] = new ArrayList<ObjectToPack>();
  147. objectsLists[Constants.OBJ_TREE] = new ArrayList<ObjectToPack>();
  148. objectsLists[Constants.OBJ_BLOB] = new ArrayList<ObjectToPack>();
  149. objectsLists[Constants.OBJ_TAG] = new ArrayList<ObjectToPack>();
  150. }
  151. private final ObjectIdSubclassMap<ObjectToPack> objectsMap = new ObjectIdSubclassMap<ObjectToPack>();
  152. // edge objects for thin packs
  153. private final ObjectIdSubclassMap<ObjectId> edgeObjects = new ObjectIdSubclassMap<ObjectId>();
  154. private final Repository db;
  155. private PackOutputStream out;
  156. private final Deflater deflater;
  157. private ProgressMonitor initMonitor;
  158. private ProgressMonitor writeMonitor;
  159. private final byte[] buf = new byte[16384]; // 16 KB
  160. private final WindowCursor windowCursor = new WindowCursor();
  161. private List<ObjectToPack> sortedByName;
  162. private byte packcsum[];
  163. private boolean reuseDeltas = DEFAULT_REUSE_DELTAS;
  164. private boolean reuseObjects = DEFAULT_REUSE_OBJECTS;
  165. private boolean deltaBaseAsOffset = DEFAULT_DELTA_BASE_AS_OFFSET;
  166. private int maxDeltaDepth = DEFAULT_MAX_DELTA_DEPTH;
  167. private int outputVersion;
  168. private boolean thin;
  169. private boolean ignoreMissingUninteresting = true;
  170. /**
  171. * Create writer for specified repository.
  172. * <p>
  173. * Objects for packing are specified in {@link #preparePack(Iterator)} or
  174. * {@link #preparePack(Collection, Collection)}.
  175. *
  176. * @param repo
  177. * repository where objects are stored.
  178. * @param monitor
  179. * operations progress monitor, used within
  180. * {@link #preparePack(Iterator)},
  181. * {@link #preparePack(Collection, Collection)}
  182. * , or {@link #writePack(OutputStream)}.
  183. */
  184. public PackWriter(final Repository repo, final ProgressMonitor monitor) {
  185. this(repo, monitor, monitor);
  186. }
  187. /**
  188. * Create writer for specified repository.
  189. * <p>
  190. * Objects for packing are specified in {@link #preparePack(Iterator)} or
  191. * {@link #preparePack(Collection, Collection)}.
  192. *
  193. * @param repo
  194. * repository where objects are stored.
  195. * @param imonitor
  196. * operations progress monitor, used within
  197. * {@link #preparePack(Iterator)},
  198. * {@link #preparePack(Collection, Collection)}
  199. * @param wmonitor
  200. * operations progress monitor, used within
  201. * {@link #writePack(OutputStream)}.
  202. */
  203. public PackWriter(final Repository repo, final ProgressMonitor imonitor,
  204. final ProgressMonitor wmonitor) {
  205. this.db = repo;
  206. initMonitor = imonitor == null ? NullProgressMonitor.INSTANCE : imonitor;
  207. writeMonitor = wmonitor == null ? NullProgressMonitor.INSTANCE : wmonitor;
  208. this.deflater = new Deflater(db.getConfig().getCore().getCompression());
  209. outputVersion = repo.getConfig().getCore().getPackIndexVersion();
  210. }
  211. /**
  212. * Check whether object is configured to reuse deltas existing in
  213. * repository.
  214. * <p>
  215. * Default setting: {@value #DEFAULT_REUSE_DELTAS}
  216. * </p>
  217. *
  218. * @return true if object is configured to reuse deltas; false otherwise.
  219. */
  220. public boolean isReuseDeltas() {
  221. return reuseDeltas;
  222. }
  223. /**
  224. * Set reuse deltas configuration option for this writer. When enabled,
  225. * writer will search for delta representation of object in repository and
  226. * use it if possible. Normally, only deltas with base to another object
  227. * existing in set of objects to pack will be used. Exception is however
  228. * thin-pack (see
  229. * {@link #preparePack(Collection, Collection)} and
  230. * {@link #preparePack(Iterator)}) where base object must exist on other
  231. * side machine.
  232. * <p>
  233. * When raw delta data is directly copied from a pack file, checksum is
  234. * computed to verify data.
  235. * </p>
  236. * <p>
  237. * Default setting: {@value #DEFAULT_REUSE_DELTAS}
  238. * </p>
  239. *
  240. * @param reuseDeltas
  241. * boolean indicating whether or not try to reuse deltas.
  242. */
  243. public void setReuseDeltas(boolean reuseDeltas) {
  244. this.reuseDeltas = reuseDeltas;
  245. }
  246. /**
  247. * Checks whether object is configured to reuse existing objects
  248. * representation in repository.
  249. * <p>
  250. * Default setting: {@value #DEFAULT_REUSE_OBJECTS}
  251. * </p>
  252. *
  253. * @return true if writer is configured to reuse objects representation from
  254. * pack; false otherwise.
  255. */
  256. public boolean isReuseObjects() {
  257. return reuseObjects;
  258. }
  259. /**
  260. * Set reuse objects configuration option for this writer. If enabled,
  261. * writer searches for representation in a pack file. If possible,
  262. * compressed data is directly copied from such a pack file. Data checksum
  263. * is verified.
  264. * <p>
  265. * Default setting: {@value #DEFAULT_REUSE_OBJECTS}
  266. * </p>
  267. *
  268. * @param reuseObjects
  269. * boolean indicating whether or not writer should reuse existing
  270. * objects representation.
  271. */
  272. public void setReuseObjects(boolean reuseObjects) {
  273. this.reuseObjects = reuseObjects;
  274. }
  275. /**
  276. * Check whether writer can store delta base as an offset (new style
  277. * reducing pack size) or should store it as an object id (legacy style,
  278. * compatible with old readers).
  279. * <p>
  280. * Default setting: {@value #DEFAULT_DELTA_BASE_AS_OFFSET}
  281. * </p>
  282. *
  283. * @return true if delta base is stored as an offset; false if it is stored
  284. * as an object id.
  285. */
  286. public boolean isDeltaBaseAsOffset() {
  287. return deltaBaseAsOffset;
  288. }
  289. /**
  290. * Set writer delta base format. Delta base can be written as an offset in a
  291. * pack file (new approach reducing file size) or as an object id (legacy
  292. * approach, compatible with old readers).
  293. * <p>
  294. * Default setting: {@value #DEFAULT_DELTA_BASE_AS_OFFSET}
  295. * </p>
  296. *
  297. * @param deltaBaseAsOffset
  298. * boolean indicating whether delta base can be stored as an
  299. * offset.
  300. */
  301. public void setDeltaBaseAsOffset(boolean deltaBaseAsOffset) {
  302. this.deltaBaseAsOffset = deltaBaseAsOffset;
  303. }
  304. /**
  305. * Get maximum depth of delta chain set up for this writer. Generated chains
  306. * are not longer than this value.
  307. * <p>
  308. * Default setting: {@value #DEFAULT_MAX_DELTA_DEPTH}
  309. * </p>
  310. *
  311. * @return maximum delta chain depth.
  312. */
  313. public int getMaxDeltaDepth() {
  314. return maxDeltaDepth;
  315. }
  316. /**
  317. * Set up maximum depth of delta chain for this writer. Generated chains are
  318. * not longer than this value. Too low value causes low compression level,
  319. * while too big makes unpacking (reading) longer.
  320. * <p>
  321. * Default setting: {@value #DEFAULT_MAX_DELTA_DEPTH}
  322. * </p>
  323. *
  324. * @param maxDeltaDepth
  325. * maximum delta chain depth.
  326. */
  327. public void setMaxDeltaDepth(int maxDeltaDepth) {
  328. this.maxDeltaDepth = maxDeltaDepth;
  329. }
  330. /** @return true if this writer is producing a thin pack. */
  331. public boolean isThin() {
  332. return thin;
  333. }
  334. /**
  335. * @param packthin
  336. * a boolean indicating whether writer may pack objects with
  337. * delta base object not within set of objects to pack, but
  338. * belonging to party repository (uninteresting/boundary) as
  339. * determined by set; this kind of pack is used only for
  340. * transport; true - to produce thin pack, false - otherwise.
  341. */
  342. public void setThin(final boolean packthin) {
  343. thin = packthin;
  344. }
  345. /**
  346. * @return true to ignore objects that are uninteresting and also not found
  347. * on local disk; false to throw a {@link MissingObjectException}
  348. * out of {@link #preparePack(Collection, Collection)} if an
  349. * uninteresting object is not in the source repository. By default,
  350. * true, permitting gracefully ignoring of uninteresting objects.
  351. */
  352. public boolean isIgnoreMissingUninteresting() {
  353. return ignoreMissingUninteresting;
  354. }
  355. /**
  356. * @param ignore
  357. * true if writer should ignore non existing uninteresting
  358. * objects during construction set of objects to pack; false
  359. * otherwise - non existing uninteresting objects may cause
  360. * {@link MissingObjectException}
  361. */
  362. public void setIgnoreMissingUninteresting(final boolean ignore) {
  363. ignoreMissingUninteresting = ignore;
  364. }
  365. /**
  366. * Set the pack index file format version this instance will create.
  367. *
  368. * @param version
  369. * the version to write. The special version 0 designates the
  370. * oldest (most compatible) format available for the objects.
  371. * @see PackIndexWriter
  372. */
  373. public void setIndexVersion(final int version) {
  374. outputVersion = version;
  375. }
  376. /**
  377. * Returns objects number in a pack file that was created by this writer.
  378. *
  379. * @return number of objects in pack.
  380. */
  381. public int getObjectsNumber() {
  382. return objectsMap.size();
  383. }
  384. /**
  385. * Prepare the list of objects to be written to the pack stream.
  386. * <p>
  387. * Iterator <b>exactly</b> determines which objects are included in a pack
  388. * and order they appear in pack (except that objects order by type is not
  389. * needed at input). This order should conform general rules of ordering
  390. * objects in git - by recency and path (type and delta-base first is
  391. * internally secured) and responsibility for guaranteeing this order is on
  392. * a caller side. Iterator must return each id of object to write exactly
  393. * once.
  394. * </p>
  395. * <p>
  396. * When iterator returns object that has {@link RevFlag#UNINTERESTING} flag,
  397. * this object won't be included in an output pack. Instead, it is recorded
  398. * as edge-object (known to remote repository) for thin-pack. In such a case
  399. * writer may pack objects with delta base object not within set of objects
  400. * to pack, but belonging to party repository - those marked with
  401. * {@link RevFlag#UNINTERESTING} flag. This type of pack is used only for
  402. * transport.
  403. * </p>
  404. *
  405. * @param objectsSource
  406. * iterator of object to store in a pack; order of objects within
  407. * each type is important, ordering by type is not needed;
  408. * allowed types for objects are {@link Constants#OBJ_COMMIT},
  409. * {@link Constants#OBJ_TREE}, {@link Constants#OBJ_BLOB} and
  410. * {@link Constants#OBJ_TAG}; objects returned by iterator may
  411. * be later reused by caller as object id and type are internally
  412. * copied in each iteration; if object returned by iterator has
  413. * {@link RevFlag#UNINTERESTING} flag set, it won't be included
  414. * in a pack, but is considered as edge-object for thin-pack.
  415. * @throws IOException
  416. * when some I/O problem occur during reading objects.
  417. */
  418. public void preparePack(final Iterator<RevObject> objectsSource)
  419. throws IOException {
  420. while (objectsSource.hasNext()) {
  421. addObject(objectsSource.next());
  422. }
  423. }
  424. /**
  425. * Prepare the list of objects to be written to the pack stream.
  426. * <p>
  427. * Basing on these 2 sets, another set of objects to put in a pack file is
  428. * created: this set consists of all objects reachable (ancestors) from
  429. * interesting objects, except uninteresting objects and their ancestors.
  430. * This method uses class {@link ObjectWalk} extensively to find out that
  431. * appropriate set of output objects and their optimal order in output pack.
  432. * Order is consistent with general git in-pack rules: sort by object type,
  433. * recency, path and delta-base first.
  434. * </p>
  435. *
  436. * @param interestingObjects
  437. * collection of objects to be marked as interesting (start
  438. * points of graph traversal).
  439. * @param uninterestingObjects
  440. * collection of objects to be marked as uninteresting (end
  441. * points of graph traversal).
  442. * @throws IOException
  443. * when some I/O problem occur during reading objects.
  444. */
  445. public void preparePack(
  446. final Collection<? extends ObjectId> interestingObjects,
  447. final Collection<? extends ObjectId> uninterestingObjects)
  448. throws IOException {
  449. ObjectWalk walker = setUpWalker(interestingObjects,
  450. uninterestingObjects);
  451. findObjectsToPack(walker);
  452. }
  453. /**
  454. * Determine if the pack file will contain the requested object.
  455. *
  456. * @param id
  457. * the object to test the existence of.
  458. * @return true if the object will appear in the output pack file.
  459. */
  460. public boolean willInclude(final AnyObjectId id) {
  461. return objectsMap.get(id) != null;
  462. }
  463. /**
  464. * Computes SHA-1 of lexicographically sorted objects ids written in this
  465. * pack, as used to name a pack file in repository.
  466. *
  467. * @return ObjectId representing SHA-1 name of a pack that was created.
  468. */
  469. public ObjectId computeName() {
  470. final MessageDigest md = Constants.newMessageDigest();
  471. for (ObjectToPack otp : sortByName()) {
  472. otp.copyRawTo(buf, 0);
  473. md.update(buf, 0, Constants.OBJECT_ID_LENGTH);
  474. }
  475. return ObjectId.fromRaw(md.digest());
  476. }
  477. /**
  478. * Create an index file to match the pack file just written.
  479. * <p>
  480. * This method can only be invoked after {@link #preparePack(Iterator)} or
  481. * {@link #preparePack(Collection, Collection)} has been
  482. * invoked and completed successfully. Writing a corresponding index is an
  483. * optional feature that not all pack users may require.
  484. *
  485. * @param indexStream
  486. * output for the index data. Caller is responsible for closing
  487. * this stream.
  488. * @throws IOException
  489. * the index data could not be written to the supplied stream.
  490. */
  491. public void writeIndex(final OutputStream indexStream) throws IOException {
  492. final List<ObjectToPack> list = sortByName();
  493. final PackIndexWriter iw;
  494. if (outputVersion <= 0)
  495. iw = PackIndexWriter.createOldestPossible(indexStream, list);
  496. else
  497. iw = PackIndexWriter.createVersion(indexStream, outputVersion);
  498. iw.write(list, packcsum);
  499. }
  500. private List<ObjectToPack> sortByName() {
  501. if (sortedByName == null) {
  502. sortedByName = new ArrayList<ObjectToPack>(objectsMap.size());
  503. for (List<ObjectToPack> list : objectsLists) {
  504. for (ObjectToPack otp : list)
  505. sortedByName.add(otp);
  506. }
  507. Collections.sort(sortedByName);
  508. }
  509. return sortedByName;
  510. }
  511. /**
  512. * Write the prepared pack to the supplied stream.
  513. * <p>
  514. * At first, this method collects and sorts objects to pack, then deltas
  515. * search is performed if set up accordingly, finally pack stream is
  516. * written. {@link ProgressMonitor} tasks {@value #SEARCHING_REUSE_PROGRESS}
  517. * (only if reuseDeltas or reuseObjects is enabled) and
  518. * {@value #WRITING_OBJECTS_PROGRESS} are updated during packing.
  519. * </p>
  520. * <p>
  521. * All reused objects data checksum (Adler32/CRC32) is computed and
  522. * validated against existing checksum.
  523. * </p>
  524. *
  525. * @param packStream
  526. * output stream of pack data. The stream should be buffered by
  527. * the caller. The caller is responsible for closing the stream.
  528. * @throws IOException
  529. * an error occurred reading a local object's data to include in
  530. * the pack, or writing compressed object data to the output
  531. * stream.
  532. */
  533. public void writePack(OutputStream packStream) throws IOException {
  534. if (reuseDeltas || reuseObjects)
  535. searchForReuse();
  536. out = new PackOutputStream(packStream);
  537. writeMonitor.beginTask(WRITING_OBJECTS_PROGRESS, getObjectsNumber());
  538. writeHeader();
  539. writeObjects();
  540. writeChecksum();
  541. windowCursor.release();
  542. writeMonitor.endTask();
  543. }
  544. private void searchForReuse() throws IOException {
  545. initMonitor.beginTask(SEARCHING_REUSE_PROGRESS, getObjectsNumber());
  546. final Collection<PackedObjectLoader> reuseLoaders = new ArrayList<PackedObjectLoader>();
  547. for (List<ObjectToPack> list : objectsLists) {
  548. for (ObjectToPack otp : list) {
  549. if (initMonitor.isCancelled())
  550. throw new IOException(
  551. "Packing cancelled during objects writing");
  552. reuseLoaders.clear();
  553. searchForReuse(reuseLoaders, otp);
  554. initMonitor.update(1);
  555. }
  556. }
  557. initMonitor.endTask();
  558. }
  559. private void searchForReuse(
  560. final Collection<PackedObjectLoader> reuseLoaders,
  561. final ObjectToPack otp) throws IOException {
  562. db.openObjectInAllPacks(otp, reuseLoaders, windowCursor);
  563. if (reuseDeltas) {
  564. selectDeltaReuseForObject(otp, reuseLoaders);
  565. }
  566. // delta reuse is preferred over object reuse
  567. if (reuseObjects && !otp.hasReuseLoader()) {
  568. selectObjectReuseForObject(otp, reuseLoaders);
  569. }
  570. }
  571. private void selectDeltaReuseForObject(final ObjectToPack otp,
  572. final Collection<PackedObjectLoader> loaders) throws IOException {
  573. PackedObjectLoader bestLoader = null;
  574. ObjectId bestBase = null;
  575. for (PackedObjectLoader loader : loaders) {
  576. ObjectId idBase = loader.getDeltaBase();
  577. if (idBase == null)
  578. continue;
  579. ObjectToPack otpBase = objectsMap.get(idBase);
  580. // only if base is in set of objects to write or thin-pack's edge
  581. if ((otpBase != null || (thin && edgeObjects.get(idBase) != null))
  582. // select smallest possible delta if > 1 available
  583. && isBetterDeltaReuseLoader(bestLoader, loader)) {
  584. bestLoader = loader;
  585. bestBase = (otpBase != null ? otpBase : idBase);
  586. }
  587. }
  588. if (bestLoader != null) {
  589. otp.setReuseLoader(bestLoader);
  590. otp.setDeltaBase(bestBase);
  591. }
  592. }
  593. private static boolean isBetterDeltaReuseLoader(
  594. PackedObjectLoader currentLoader, PackedObjectLoader loader)
  595. throws IOException {
  596. if (currentLoader == null)
  597. return true;
  598. if (loader.getRawSize() < currentLoader.getRawSize())
  599. return true;
  600. return (loader.getRawSize() == currentLoader.getRawSize()
  601. && loader.supportsFastCopyRawData() && !currentLoader
  602. .supportsFastCopyRawData());
  603. }
  604. private void selectObjectReuseForObject(final ObjectToPack otp,
  605. final Collection<PackedObjectLoader> loaders) {
  606. for (final PackedObjectLoader loader : loaders) {
  607. if (loader instanceof WholePackedObjectLoader) {
  608. otp.setReuseLoader(loader);
  609. return;
  610. }
  611. }
  612. }
  613. private void writeHeader() throws IOException {
  614. System.arraycopy(Constants.PACK_SIGNATURE, 0, buf, 0, 4);
  615. NB.encodeInt32(buf, 4, PACK_VERSION_GENERATED);
  616. NB.encodeInt32(buf, 8, getObjectsNumber());
  617. out.write(buf, 0, 12);
  618. }
  619. private void writeObjects() throws IOException {
  620. for (List<ObjectToPack> list : objectsLists) {
  621. for (ObjectToPack otp : list) {
  622. if (writeMonitor.isCancelled())
  623. throw new IOException(
  624. "Packing cancelled during objects writing");
  625. if (!otp.isWritten())
  626. writeObject(otp);
  627. }
  628. }
  629. }
  630. private void writeObject(final ObjectToPack otp) throws IOException {
  631. otp.markWantWrite();
  632. if (otp.isDeltaRepresentation()) {
  633. ObjectToPack deltaBase = otp.getDeltaBase();
  634. assert deltaBase != null || thin;
  635. if (deltaBase != null && !deltaBase.isWritten()) {
  636. if (deltaBase.wantWrite()) {
  637. otp.clearDeltaBase(); // cycle detected
  638. otp.disposeLoader();
  639. } else {
  640. writeObject(deltaBase);
  641. }
  642. }
  643. }
  644. assert !otp.isWritten();
  645. out.resetCRC32();
  646. otp.setOffset(out.length());
  647. final PackedObjectLoader reuse = open(otp);
  648. if (reuse != null) {
  649. try {
  650. if (otp.isDeltaRepresentation()) {
  651. writeDeltaObjectReuse(otp, reuse);
  652. } else {
  653. writeObjectHeader(otp.getType(), reuse.getSize());
  654. reuse.copyRawData(out, buf, windowCursor);
  655. }
  656. } finally {
  657. reuse.endCopyRawData();
  658. }
  659. } else if (otp.isDeltaRepresentation()) {
  660. throw new IOException("creating deltas is not implemented");
  661. } else {
  662. writeWholeObjectDeflate(otp);
  663. }
  664. otp.setCRC(out.getCRC32());
  665. writeMonitor.update(1);
  666. }
  667. private PackedObjectLoader open(final ObjectToPack otp) throws IOException {
  668. for (;;) {
  669. PackedObjectLoader reuse = otp.useLoader();
  670. if (reuse == null) {
  671. return null;
  672. }
  673. try {
  674. reuse.beginCopyRawData();
  675. return reuse;
  676. } catch (IOException err) {
  677. // The pack we found the object in originally is gone, or
  678. // it has been overwritten with a different layout.
  679. //
  680. otp.clearDeltaBase();
  681. searchForReuse(new ArrayList<PackedObjectLoader>(), otp);
  682. continue;
  683. }
  684. }
  685. }
  686. private void writeWholeObjectDeflate(final ObjectToPack otp)
  687. throws IOException {
  688. final ObjectLoader loader = db.openObject(windowCursor, otp);
  689. final byte[] data = loader.getCachedBytes();
  690. writeObjectHeader(otp.getType(), data.length);
  691. deflater.reset();
  692. deflater.setInput(data, 0, data.length);
  693. deflater.finish();
  694. do {
  695. final int n = deflater.deflate(buf, 0, buf.length);
  696. if (n > 0)
  697. out.write(buf, 0, n);
  698. } while (!deflater.finished());
  699. }
  700. private void writeDeltaObjectReuse(final ObjectToPack otp,
  701. final PackedObjectLoader reuse) throws IOException {
  702. if (deltaBaseAsOffset && otp.getDeltaBase() != null) {
  703. writeObjectHeader(Constants.OBJ_OFS_DELTA, reuse.getRawSize());
  704. final ObjectToPack deltaBase = otp.getDeltaBase();
  705. long offsetDiff = otp.getOffset() - deltaBase.getOffset();
  706. int pos = buf.length - 1;
  707. buf[pos] = (byte) (offsetDiff & 0x7F);
  708. while ((offsetDiff >>= 7) > 0) {
  709. buf[--pos] = (byte) (0x80 | (--offsetDiff & 0x7F));
  710. }
  711. out.write(buf, pos, buf.length - pos);
  712. } else {
  713. writeObjectHeader(Constants.OBJ_REF_DELTA, reuse.getRawSize());
  714. otp.getDeltaBaseId().copyRawTo(buf, 0);
  715. out.write(buf, 0, Constants.OBJECT_ID_LENGTH);
  716. }
  717. reuse.copyRawData(out, buf, windowCursor);
  718. }
  719. private void writeObjectHeader(final int objectType, long dataLength)
  720. throws IOException {
  721. long nextLength = dataLength >>> 4;
  722. int size = 0;
  723. buf[size++] = (byte) ((nextLength > 0 ? 0x80 : 0x00)
  724. | (objectType << 4) | (dataLength & 0x0F));
  725. dataLength = nextLength;
  726. while (dataLength > 0) {
  727. nextLength >>>= 7;
  728. buf[size++] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (dataLength & 0x7F));
  729. dataLength = nextLength;
  730. }
  731. out.write(buf, 0, size);
  732. }
  733. private void writeChecksum() throws IOException {
  734. packcsum = out.getDigest();
  735. out.write(packcsum);
  736. }
  737. private ObjectWalk setUpWalker(
  738. final Collection<? extends ObjectId> interestingObjects,
  739. final Collection<? extends ObjectId> uninterestingObjects)
  740. throws MissingObjectException, IOException,
  741. IncorrectObjectTypeException {
  742. final ObjectWalk walker = new ObjectWalk(db);
  743. walker.setRetainBody(false);
  744. walker.sort(RevSort.TOPO);
  745. walker.sort(RevSort.COMMIT_TIME_DESC, true);
  746. if (thin)
  747. walker.sort(RevSort.BOUNDARY, true);
  748. for (ObjectId id : interestingObjects) {
  749. RevObject o = walker.parseAny(id);
  750. walker.markStart(o);
  751. }
  752. if (uninterestingObjects != null) {
  753. for (ObjectId id : uninterestingObjects) {
  754. final RevObject o;
  755. try {
  756. o = walker.parseAny(id);
  757. } catch (MissingObjectException x) {
  758. if (ignoreMissingUninteresting)
  759. continue;
  760. throw x;
  761. }
  762. walker.markUninteresting(o);
  763. }
  764. }
  765. return walker;
  766. }
  767. private void findObjectsToPack(final ObjectWalk walker)
  768. throws MissingObjectException, IncorrectObjectTypeException,
  769. IOException {
  770. initMonitor.beginTask(COUNTING_OBJECTS_PROGRESS,
  771. ProgressMonitor.UNKNOWN);
  772. RevObject o;
  773. while ((o = walker.next()) != null) {
  774. addObject(o);
  775. initMonitor.update(1);
  776. }
  777. while ((o = walker.nextObject()) != null) {
  778. addObject(o);
  779. initMonitor.update(1);
  780. }
  781. initMonitor.endTask();
  782. }
  783. /**
  784. * Include one object to the output file.
  785. * <p>
  786. * Objects are written in the order they are added. If the same object is
  787. * added twice, it may be written twice, creating a larger than necessary
  788. * file.
  789. *
  790. * @param object
  791. * the object to add.
  792. * @throws IncorrectObjectTypeException
  793. * the object is an unsupported type.
  794. */
  795. public void addObject(final RevObject object)
  796. throws IncorrectObjectTypeException {
  797. if (object.has(RevFlag.UNINTERESTING)) {
  798. edgeObjects.add(object);
  799. thin = true;
  800. return;
  801. }
  802. final ObjectToPack otp = new ObjectToPack(object, object.getType());
  803. try {
  804. objectsLists[object.getType()].add(otp);
  805. } catch (ArrayIndexOutOfBoundsException x) {
  806. throw new IncorrectObjectTypeException(object,
  807. "COMMIT nor TREE nor BLOB nor TAG");
  808. } catch (UnsupportedOperationException x) {
  809. // index pointing to "dummy" empty list
  810. throw new IncorrectObjectTypeException(object,
  811. "COMMIT nor TREE nor BLOB nor TAG");
  812. }
  813. objectsMap.add(otp);
  814. }
  815. /**
  816. * Class holding information about object that is going to be packed by
  817. * {@link PackWriter}. Information include object representation in a
  818. * pack-file and object status.
  819. *
  820. */
  821. static class ObjectToPack extends PackedObjectInfo {
  822. private ObjectId deltaBase;
  823. private PackedObjectLoader reuseLoader;
  824. /**
  825. * Bit field, from bit 0 to bit 31:
  826. * <ul>
  827. * <li>1 bit: wantWrite</li>
  828. * <li>3 bits: type</li>
  829. * <li>28 bits: deltaDepth</li>
  830. * </ul>
  831. */
  832. private int flags;
  833. /**
  834. * Construct object for specified object id. <br/> By default object is
  835. * marked as not written and non-delta packed (as a whole object).
  836. *
  837. * @param src
  838. * object id of object for packing
  839. * @param type
  840. * real type code of the object, not its in-pack type.
  841. */
  842. ObjectToPack(AnyObjectId src, final int type) {
  843. super(src);
  844. flags |= type << 1;
  845. }
  846. /**
  847. * @return delta base object id if object is going to be packed in delta
  848. * representation; null otherwise - if going to be packed as a
  849. * whole object.
  850. */
  851. ObjectId getDeltaBaseId() {
  852. return deltaBase;
  853. }
  854. /**
  855. * @return delta base object to pack if object is going to be packed in
  856. * delta representation and delta is specified as object to
  857. * pack; null otherwise - if going to be packed as a whole
  858. * object or delta base is specified only as id.
  859. */
  860. ObjectToPack getDeltaBase() {
  861. if (deltaBase instanceof ObjectToPack)
  862. return (ObjectToPack) deltaBase;
  863. return null;
  864. }
  865. /**
  866. * Set delta base for the object. Delta base set by this method is used
  867. * by {@link PackWriter} to write object - determines its representation
  868. * in a created pack.
  869. *
  870. * @param deltaBase
  871. * delta base object or null if object should be packed as a
  872. * whole object.
  873. *
  874. */
  875. void setDeltaBase(ObjectId deltaBase) {
  876. this.deltaBase = deltaBase;
  877. }
  878. void clearDeltaBase() {
  879. this.deltaBase = null;
  880. }
  881. /**
  882. * @return true if object is going to be written as delta; false
  883. * otherwise.
  884. */
  885. boolean isDeltaRepresentation() {
  886. return deltaBase != null;
  887. }
  888. /**
  889. * Check if object is already written in a pack. This information is
  890. * used to achieve delta-base precedence in a pack file.
  891. *
  892. * @return true if object is already written; false otherwise.
  893. */
  894. boolean isWritten() {
  895. return getOffset() != 0;
  896. }
  897. PackedObjectLoader useLoader() {
  898. final PackedObjectLoader r = reuseLoader;
  899. reuseLoader = null;
  900. return r;
  901. }
  902. boolean hasReuseLoader() {
  903. return reuseLoader != null;
  904. }
  905. void setReuseLoader(PackedObjectLoader reuseLoader) {
  906. this.reuseLoader = reuseLoader;
  907. }
  908. void disposeLoader() {
  909. this.reuseLoader = null;
  910. }
  911. int getType() {
  912. return (flags>>1) & 0x7;
  913. }
  914. int getDeltaDepth() {
  915. return flags >>> 4;
  916. }
  917. void updateDeltaDepth() {
  918. final int d;
  919. if (deltaBase instanceof ObjectToPack)
  920. d = ((ObjectToPack) deltaBase).getDeltaDepth() + 1;
  921. else if (deltaBase != null)
  922. d = 1;
  923. else
  924. d = 0;
  925. flags = (d << 4) | flags & 0x15;
  926. }
  927. boolean wantWrite() {
  928. return (flags & 1) == 1;
  929. }
  930. void markWantWrite() {
  931. flags |= 1;
  932. }
  933. }
  934. }