You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PackWriter.java 32KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037
  1. /*
  2. * Copyright (C) 2008-2010, Google Inc.
  3. * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
  4. * and other copyright owners as documented in the project's IP log.
  5. *
  6. * This program and the accompanying materials are made available
  7. * under the terms of the Eclipse Distribution License v1.0 which
  8. * accompanies this distribution, is reproduced below, and is
  9. * available at http://www.eclipse.org/org/documents/edl-v10.php
  10. *
  11. * All rights reserved.
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. *
  20. * - Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials provided
  23. * with the distribution.
  24. *
  25. * - Neither the name of the Eclipse Foundation, Inc. nor the
  26. * names of its contributors may be used to endorse or promote
  27. * products derived from this software without specific prior
  28. * written permission.
  29. *
  30. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  31. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  32. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  36. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  37. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  38. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  39. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  40. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  42. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. */
  44. package org.eclipse.jgit.lib;
  45. import java.io.IOException;
  46. import java.io.OutputStream;
  47. import java.security.MessageDigest;
  48. import java.util.ArrayList;
  49. import java.util.Collection;
  50. import java.util.Collections;
  51. import java.util.Iterator;
  52. import java.util.List;
  53. import java.util.zip.Deflater;
  54. import org.eclipse.jgit.errors.IncorrectObjectTypeException;
  55. import org.eclipse.jgit.errors.MissingObjectException;
  56. import org.eclipse.jgit.revwalk.ObjectWalk;
  57. import org.eclipse.jgit.revwalk.RevFlag;
  58. import org.eclipse.jgit.revwalk.RevObject;
  59. import org.eclipse.jgit.revwalk.RevSort;
  60. import org.eclipse.jgit.transport.PackedObjectInfo;
  61. import org.eclipse.jgit.util.NB;
  62. /**
  63. * <p>
  64. * PackWriter class is responsible for generating pack files from specified set
  65. * of objects from repository. This implementation produce pack files in format
  66. * version 2.
  67. * </p>
  68. * <p>
  69. * Source of objects may be specified in two ways:
  70. * <ul>
  71. * <li>(usually) by providing sets of interesting and uninteresting objects in
  72. * repository - all interesting objects and their ancestors except uninteresting
  73. * objects and their ancestors will be included in pack, or</li>
  74. * <li>by providing iterator of {@link RevObject} specifying exact list and
  75. * order of objects in pack</li>
  76. * </ul>
  77. * Typical usage consists of creating instance intended for some pack,
  78. * configuring options, preparing the list of objects by calling
  79. * {@link #preparePack(Iterator)} or
  80. * {@link #preparePack(Collection, Collection)}, and finally
  81. * producing the stream with {@link #writePack(OutputStream)}.
  82. * </p>
  83. * <p>
  84. * Class provide set of configurable options and {@link ProgressMonitor}
  85. * support, as operations may take a long time for big repositories. Deltas
  86. * searching algorithm is <b>NOT IMPLEMENTED</b> yet - this implementation
  87. * relies only on deltas and objects reuse.
  88. * </p>
  89. * <p>
  90. * This class is not thread safe, it is intended to be used in one thread, with
  91. * one instance per created pack. Subsequent calls to writePack result in
  92. * undefined behavior.
  93. * </p>
  94. */
  95. public class PackWriter {
  96. /**
  97. * Title of {@link ProgressMonitor} task used during counting objects to
  98. * pack.
  99. *
  100. * @see #preparePack(Collection, Collection)
  101. */
  102. public static final String COUNTING_OBJECTS_PROGRESS = "Counting objects";
  103. /**
  104. * Title of {@link ProgressMonitor} task used during searching for objects
  105. * reuse or delta reuse.
  106. *
  107. * @see #writePack(OutputStream)
  108. */
  109. public static final String SEARCHING_REUSE_PROGRESS = "Compressing objects";
  110. /**
  111. * Title of {@link ProgressMonitor} task used during writing out pack
  112. * (objects)
  113. *
  114. * @see #writePack(OutputStream)
  115. */
  116. public static final String WRITING_OBJECTS_PROGRESS = "Writing objects";
  117. /**
  118. * Default value of deltas reuse option.
  119. *
  120. * @see #setReuseDeltas(boolean)
  121. */
  122. public static final boolean DEFAULT_REUSE_DELTAS = true;
  123. /**
  124. * Default value of objects reuse option.
  125. *
  126. * @see #setReuseObjects(boolean)
  127. */
  128. public static final boolean DEFAULT_REUSE_OBJECTS = true;
  129. /**
  130. * Default value of delta base as offset option.
  131. *
  132. * @see #setDeltaBaseAsOffset(boolean)
  133. */
  134. public static final boolean DEFAULT_DELTA_BASE_AS_OFFSET = false;
  135. /**
  136. * Default value of maximum delta chain depth.
  137. *
  138. * @see #setMaxDeltaDepth(int)
  139. */
  140. public static final int DEFAULT_MAX_DELTA_DEPTH = 50;
  141. private static final int PACK_VERSION_GENERATED = 2;
  142. @SuppressWarnings("unchecked")
  143. private final List<ObjectToPack> objectsLists[] = new List[Constants.OBJ_TAG + 1];
  144. {
  145. objectsLists[0] = Collections.<ObjectToPack> emptyList();
  146. objectsLists[Constants.OBJ_COMMIT] = new ArrayList<ObjectToPack>();
  147. objectsLists[Constants.OBJ_TREE] = new ArrayList<ObjectToPack>();
  148. objectsLists[Constants.OBJ_BLOB] = new ArrayList<ObjectToPack>();
  149. objectsLists[Constants.OBJ_TAG] = new ArrayList<ObjectToPack>();
  150. }
  151. private final ObjectIdSubclassMap<ObjectToPack> objectsMap = new ObjectIdSubclassMap<ObjectToPack>();
  152. // edge objects for thin packs
  153. private final ObjectIdSubclassMap<ObjectId> edgeObjects = new ObjectIdSubclassMap<ObjectId>();
  154. private final Repository db;
  155. private PackOutputStream out;
  156. private final Deflater deflater;
  157. private ProgressMonitor initMonitor;
  158. private ProgressMonitor writeMonitor;
  159. private final byte[] buf = new byte[16384]; // 16 KB
  160. private final WindowCursor windowCursor = new WindowCursor();
  161. private List<ObjectToPack> sortedByName;
  162. private byte packcsum[];
  163. private boolean reuseDeltas = DEFAULT_REUSE_DELTAS;
  164. private boolean reuseObjects = DEFAULT_REUSE_OBJECTS;
  165. private boolean deltaBaseAsOffset = DEFAULT_DELTA_BASE_AS_OFFSET;
  166. private int maxDeltaDepth = DEFAULT_MAX_DELTA_DEPTH;
  167. private int outputVersion;
  168. private boolean thin;
  169. private boolean ignoreMissingUninteresting = true;
  170. /**
  171. * Create writer for specified repository.
  172. * <p>
  173. * Objects for packing are specified in {@link #preparePack(Iterator)} or
  174. * {@link #preparePack(Collection, Collection)}.
  175. *
  176. * @param repo
  177. * repository where objects are stored.
  178. * @param monitor
  179. * operations progress monitor, used within
  180. * {@link #preparePack(Iterator)},
  181. * {@link #preparePack(Collection, Collection)}
  182. * , or {@link #writePack(OutputStream)}.
  183. */
  184. public PackWriter(final Repository repo, final ProgressMonitor monitor) {
  185. this(repo, monitor, monitor);
  186. }
  187. /**
  188. * Create writer for specified repository.
  189. * <p>
  190. * Objects for packing are specified in {@link #preparePack(Iterator)} or
  191. * {@link #preparePack(Collection, Collection)}.
  192. *
  193. * @param repo
  194. * repository where objects are stored.
  195. * @param imonitor
  196. * operations progress monitor, used within
  197. * {@link #preparePack(Iterator)},
  198. * {@link #preparePack(Collection, Collection)}
  199. * @param wmonitor
  200. * operations progress monitor, used within
  201. * {@link #writePack(OutputStream)}.
  202. */
  203. public PackWriter(final Repository repo, final ProgressMonitor imonitor,
  204. final ProgressMonitor wmonitor) {
  205. this.db = repo;
  206. initMonitor = imonitor == null ? NullProgressMonitor.INSTANCE : imonitor;
  207. writeMonitor = wmonitor == null ? NullProgressMonitor.INSTANCE : wmonitor;
  208. this.deflater = new Deflater(db.getConfig().getCore().getCompression());
  209. outputVersion = repo.getConfig().getCore().getPackIndexVersion();
  210. }
  211. /**
  212. * Check whether object is configured to reuse deltas existing in
  213. * repository.
  214. * <p>
  215. * Default setting: {@value #DEFAULT_REUSE_DELTAS}
  216. * </p>
  217. *
  218. * @return true if object is configured to reuse deltas; false otherwise.
  219. */
  220. public boolean isReuseDeltas() {
  221. return reuseDeltas;
  222. }
  223. /**
  224. * Set reuse deltas configuration option for this writer. When enabled,
  225. * writer will search for delta representation of object in repository and
  226. * use it if possible. Normally, only deltas with base to another object
  227. * existing in set of objects to pack will be used. Exception is however
  228. * thin-pack (see
  229. * {@link #preparePack(Collection, Collection)} and
  230. * {@link #preparePack(Iterator)}) where base object must exist on other
  231. * side machine.
  232. * <p>
  233. * When raw delta data is directly copied from a pack file, checksum is
  234. * computed to verify data.
  235. * </p>
  236. * <p>
  237. * Default setting: {@value #DEFAULT_REUSE_DELTAS}
  238. * </p>
  239. *
  240. * @param reuseDeltas
  241. * boolean indicating whether or not try to reuse deltas.
  242. */
  243. public void setReuseDeltas(boolean reuseDeltas) {
  244. this.reuseDeltas = reuseDeltas;
  245. }
  246. /**
  247. * Checks whether object is configured to reuse existing objects
  248. * representation in repository.
  249. * <p>
  250. * Default setting: {@value #DEFAULT_REUSE_OBJECTS}
  251. * </p>
  252. *
  253. * @return true if writer is configured to reuse objects representation from
  254. * pack; false otherwise.
  255. */
  256. public boolean isReuseObjects() {
  257. return reuseObjects;
  258. }
  259. /**
  260. * Set reuse objects configuration option for this writer. If enabled,
  261. * writer searches for representation in a pack file. If possible,
  262. * compressed data is directly copied from such a pack file. Data checksum
  263. * is verified.
  264. * <p>
  265. * Default setting: {@value #DEFAULT_REUSE_OBJECTS}
  266. * </p>
  267. *
  268. * @param reuseObjects
  269. * boolean indicating whether or not writer should reuse existing
  270. * objects representation.
  271. */
  272. public void setReuseObjects(boolean reuseObjects) {
  273. this.reuseObjects = reuseObjects;
  274. }
  275. /**
  276. * Check whether writer can store delta base as an offset (new style
  277. * reducing pack size) or should store it as an object id (legacy style,
  278. * compatible with old readers).
  279. * <p>
  280. * Default setting: {@value #DEFAULT_DELTA_BASE_AS_OFFSET}
  281. * </p>
  282. *
  283. * @return true if delta base is stored as an offset; false if it is stored
  284. * as an object id.
  285. */
  286. public boolean isDeltaBaseAsOffset() {
  287. return deltaBaseAsOffset;
  288. }
  289. /**
  290. * Set writer delta base format. Delta base can be written as an offset in a
  291. * pack file (new approach reducing file size) or as an object id (legacy
  292. * approach, compatible with old readers).
  293. * <p>
  294. * Default setting: {@value #DEFAULT_DELTA_BASE_AS_OFFSET}
  295. * </p>
  296. *
  297. * @param deltaBaseAsOffset
  298. * boolean indicating whether delta base can be stored as an
  299. * offset.
  300. */
  301. public void setDeltaBaseAsOffset(boolean deltaBaseAsOffset) {
  302. this.deltaBaseAsOffset = deltaBaseAsOffset;
  303. }
  304. /**
  305. * Get maximum depth of delta chain set up for this writer. Generated chains
  306. * are not longer than this value.
  307. * <p>
  308. * Default setting: {@value #DEFAULT_MAX_DELTA_DEPTH}
  309. * </p>
  310. *
  311. * @return maximum delta chain depth.
  312. */
  313. public int getMaxDeltaDepth() {
  314. return maxDeltaDepth;
  315. }
  316. /**
  317. * Set up maximum depth of delta chain for this writer. Generated chains are
  318. * not longer than this value. Too low value causes low compression level,
  319. * while too big makes unpacking (reading) longer.
  320. * <p>
  321. * Default setting: {@value #DEFAULT_MAX_DELTA_DEPTH}
  322. * </p>
  323. *
  324. * @param maxDeltaDepth
  325. * maximum delta chain depth.
  326. */
  327. public void setMaxDeltaDepth(int maxDeltaDepth) {
  328. this.maxDeltaDepth = maxDeltaDepth;
  329. }
  330. /** @return true if this writer is producing a thin pack. */
  331. public boolean isThin() {
  332. return thin;
  333. }
  334. /**
  335. * @param packthin
  336. * a boolean indicating whether writer may pack objects with
  337. * delta base object not within set of objects to pack, but
  338. * belonging to party repository (uninteresting/boundary) as
  339. * determined by set; this kind of pack is used only for
  340. * transport; true - to produce thin pack, false - otherwise.
  341. */
  342. public void setThin(final boolean packthin) {
  343. thin = packthin;
  344. }
  345. /**
  346. * @return true to ignore objects that are uninteresting and also not found
  347. * on local disk; false to throw a {@link MissingObjectException}
  348. * out of {@link #preparePack(Collection, Collection)} if an
  349. * uninteresting object is not in the source repository. By default,
  350. * true, permitting gracefully ignoring of uninteresting objects.
  351. */
  352. public boolean isIgnoreMissingUninteresting() {
  353. return ignoreMissingUninteresting;
  354. }
  355. /**
  356. * @param ignore
  357. * true if writer should ignore non existing uninteresting
  358. * objects during construction set of objects to pack; false
  359. * otherwise - non existing uninteresting objects may cause
  360. * {@link MissingObjectException}
  361. */
  362. public void setIgnoreMissingUninteresting(final boolean ignore) {
  363. ignoreMissingUninteresting = ignore;
  364. }
  365. /**
  366. * Set the pack index file format version this instance will create.
  367. *
  368. * @param version
  369. * the version to write. The special version 0 designates the
  370. * oldest (most compatible) format available for the objects.
  371. * @see PackIndexWriter
  372. */
  373. public void setIndexVersion(final int version) {
  374. outputVersion = version;
  375. }
  376. /**
  377. * Returns objects number in a pack file that was created by this writer.
  378. *
  379. * @return number of objects in pack.
  380. */
  381. public int getObjectsNumber() {
  382. return objectsMap.size();
  383. }
  384. /**
  385. * Prepare the list of objects to be written to the pack stream.
  386. * <p>
  387. * Iterator <b>exactly</b> determines which objects are included in a pack
  388. * and order they appear in pack (except that objects order by type is not
  389. * needed at input). This order should conform general rules of ordering
  390. * objects in git - by recency and path (type and delta-base first is
  391. * internally secured) and responsibility for guaranteeing this order is on
  392. * a caller side. Iterator must return each id of object to write exactly
  393. * once.
  394. * </p>
  395. * <p>
  396. * When iterator returns object that has {@link RevFlag#UNINTERESTING} flag,
  397. * this object won't be included in an output pack. Instead, it is recorded
  398. * as edge-object (known to remote repository) for thin-pack. In such a case
  399. * writer may pack objects with delta base object not within set of objects
  400. * to pack, but belonging to party repository - those marked with
  401. * {@link RevFlag#UNINTERESTING} flag. This type of pack is used only for
  402. * transport.
  403. * </p>
  404. *
  405. * @param objectsSource
  406. * iterator of object to store in a pack; order of objects within
  407. * each type is important, ordering by type is not needed;
  408. * allowed types for objects are {@link Constants#OBJ_COMMIT},
  409. * {@link Constants#OBJ_TREE}, {@link Constants#OBJ_BLOB} and
  410. * {@link Constants#OBJ_TAG}; objects returned by iterator may
  411. * be later reused by caller as object id and type are internally
  412. * copied in each iteration; if object returned by iterator has
  413. * {@link RevFlag#UNINTERESTING} flag set, it won't be included
  414. * in a pack, but is considered as edge-object for thin-pack.
  415. * @throws IOException
  416. * when some I/O problem occur during reading objects.
  417. */
  418. public void preparePack(final Iterator<RevObject> objectsSource)
  419. throws IOException {
  420. while (objectsSource.hasNext()) {
  421. addObject(objectsSource.next());
  422. }
  423. }
  424. /**
  425. * Prepare the list of objects to be written to the pack stream.
  426. * <p>
  427. * Basing on these 2 sets, another set of objects to put in a pack file is
  428. * created: this set consists of all objects reachable (ancestors) from
  429. * interesting objects, except uninteresting objects and their ancestors.
  430. * This method uses class {@link ObjectWalk} extensively to find out that
  431. * appropriate set of output objects and their optimal order in output pack.
  432. * Order is consistent with general git in-pack rules: sort by object type,
  433. * recency, path and delta-base first.
  434. * </p>
  435. *
  436. * @param interestingObjects
  437. * collection of objects to be marked as interesting (start
  438. * points of graph traversal).
  439. * @param uninterestingObjects
  440. * collection of objects to be marked as uninteresting (end
  441. * points of graph traversal).
  442. * @throws IOException
  443. * when some I/O problem occur during reading objects.
  444. */
  445. public void preparePack(
  446. final Collection<? extends ObjectId> interestingObjects,
  447. final Collection<? extends ObjectId> uninterestingObjects)
  448. throws IOException {
  449. ObjectWalk walker = setUpWalker(interestingObjects,
  450. uninterestingObjects);
  451. findObjectsToPack(walker);
  452. }
  453. /**
  454. * Determine if the pack file will contain the requested object.
  455. *
  456. * @param id
  457. * the object to test the existence of.
  458. * @return true if the object will appear in the output pack file.
  459. */
  460. public boolean willInclude(final AnyObjectId id) {
  461. return objectsMap.get(id) != null;
  462. }
  463. /**
  464. * Computes SHA-1 of lexicographically sorted objects ids written in this
  465. * pack, as used to name a pack file in repository.
  466. *
  467. * @return ObjectId representing SHA-1 name of a pack that was created.
  468. */
  469. public ObjectId computeName() {
  470. final MessageDigest md = Constants.newMessageDigest();
  471. for (ObjectToPack otp : sortByName()) {
  472. otp.copyRawTo(buf, 0);
  473. md.update(buf, 0, Constants.OBJECT_ID_LENGTH);
  474. }
  475. return ObjectId.fromRaw(md.digest());
  476. }
  477. /**
  478. * Create an index file to match the pack file just written.
  479. * <p>
  480. * This method can only be invoked after {@link #preparePack(Iterator)} or
  481. * {@link #preparePack(Collection, Collection)} has been
  482. * invoked and completed successfully. Writing a corresponding index is an
  483. * optional feature that not all pack users may require.
  484. *
  485. * @param indexStream
  486. * output for the index data. Caller is responsible for closing
  487. * this stream.
  488. * @throws IOException
  489. * the index data could not be written to the supplied stream.
  490. */
  491. public void writeIndex(final OutputStream indexStream) throws IOException {
  492. final List<ObjectToPack> list = sortByName();
  493. final PackIndexWriter iw;
  494. if (outputVersion <= 0)
  495. iw = PackIndexWriter.createOldestPossible(indexStream, list);
  496. else
  497. iw = PackIndexWriter.createVersion(indexStream, outputVersion);
  498. iw.write(list, packcsum);
  499. }
  500. private List<ObjectToPack> sortByName() {
  501. if (sortedByName == null) {
  502. sortedByName = new ArrayList<ObjectToPack>(objectsMap.size());
  503. for (List<ObjectToPack> list : objectsLists) {
  504. for (ObjectToPack otp : list)
  505. sortedByName.add(otp);
  506. }
  507. Collections.sort(sortedByName);
  508. }
  509. return sortedByName;
  510. }
  511. /**
  512. * Write the prepared pack to the supplied stream.
  513. * <p>
  514. * At first, this method collects and sorts objects to pack, then deltas
  515. * search is performed if set up accordingly, finally pack stream is
  516. * written. {@link ProgressMonitor} tasks {@value #SEARCHING_REUSE_PROGRESS}
  517. * (only if reuseDeltas or reuseObjects is enabled) and
  518. * {@value #WRITING_OBJECTS_PROGRESS} are updated during packing.
  519. * </p>
  520. * <p>
  521. * All reused objects data checksum (Adler32/CRC32) is computed and
  522. * validated against existing checksum.
  523. * </p>
  524. *
  525. * @param packStream
  526. * output stream of pack data. The stream should be buffered by
  527. * the caller. The caller is responsible for closing the stream.
  528. * @throws IOException
  529. * an error occurred reading a local object's data to include in
  530. * the pack, or writing compressed object data to the output
  531. * stream.
  532. */
  533. public void writePack(OutputStream packStream) throws IOException {
  534. if (reuseDeltas || reuseObjects)
  535. searchForReuse();
  536. out = new PackOutputStream(packStream);
  537. writeMonitor.beginTask(WRITING_OBJECTS_PROGRESS, getObjectsNumber());
  538. writeHeader();
  539. writeObjects();
  540. writeChecksum();
  541. windowCursor.release();
  542. writeMonitor.endTask();
  543. }
  544. private void searchForReuse() throws IOException {
  545. initMonitor.beginTask(SEARCHING_REUSE_PROGRESS, getObjectsNumber());
  546. final Collection<PackedObjectLoader> reuseLoaders = new ArrayList<PackedObjectLoader>();
  547. for (List<ObjectToPack> list : objectsLists) {
  548. for (ObjectToPack otp : list) {
  549. if (initMonitor.isCancelled())
  550. throw new IOException(
  551. "Packing cancelled during objects writing");
  552. reuseLoaders.clear();
  553. searchForReuse(reuseLoaders, otp);
  554. initMonitor.update(1);
  555. }
  556. }
  557. initMonitor.endTask();
  558. }
  559. private void searchForReuse(
  560. final Collection<PackedObjectLoader> reuseLoaders,
  561. final ObjectToPack otp) throws IOException {
  562. db.openObjectInAllPacks(otp, reuseLoaders, windowCursor);
  563. if (reuseDeltas) {
  564. selectDeltaReuseForObject(otp, reuseLoaders);
  565. }
  566. // delta reuse is preferred over object reuse
  567. if (reuseObjects && !otp.hasReuseLoader()) {
  568. selectObjectReuseForObject(otp, reuseLoaders);
  569. }
  570. }
  571. private void selectDeltaReuseForObject(final ObjectToPack otp,
  572. final Collection<PackedObjectLoader> loaders) throws IOException {
  573. PackedObjectLoader bestLoader = null;
  574. ObjectId bestBase = null;
  575. for (PackedObjectLoader loader : loaders) {
  576. ObjectId idBase = loader.getDeltaBase();
  577. if (idBase == null)
  578. continue;
  579. ObjectToPack otpBase = objectsMap.get(idBase);
  580. // only if base is in set of objects to write or thin-pack's edge
  581. if ((otpBase != null || (thin && edgeObjects.get(idBase) != null))
  582. // select smallest possible delta if > 1 available
  583. && isBetterDeltaReuseLoader(bestLoader, loader)) {
  584. bestLoader = loader;
  585. bestBase = (otpBase != null ? otpBase : idBase);
  586. }
  587. }
  588. if (bestLoader != null) {
  589. otp.setReuseLoader(bestLoader);
  590. otp.setDeltaBase(bestBase);
  591. }
  592. }
  593. private static boolean isBetterDeltaReuseLoader(
  594. PackedObjectLoader currentLoader, PackedObjectLoader loader)
  595. throws IOException {
  596. if (currentLoader == null)
  597. return true;
  598. if (loader.getRawSize() < currentLoader.getRawSize())
  599. return true;
  600. return (loader.getRawSize() == currentLoader.getRawSize()
  601. && loader.supportsFastCopyRawData() && !currentLoader
  602. .supportsFastCopyRawData());
  603. }
  604. private void selectObjectReuseForObject(final ObjectToPack otp,
  605. final Collection<PackedObjectLoader> loaders) {
  606. for (final PackedObjectLoader loader : loaders) {
  607. if (loader instanceof WholePackedObjectLoader) {
  608. otp.setReuseLoader(loader);
  609. return;
  610. }
  611. }
  612. }
  613. private void writeHeader() throws IOException {
  614. System.arraycopy(Constants.PACK_SIGNATURE, 0, buf, 0, 4);
  615. NB.encodeInt32(buf, 4, PACK_VERSION_GENERATED);
  616. NB.encodeInt32(buf, 8, getObjectsNumber());
  617. out.write(buf, 0, 12);
  618. }
  619. private void writeObjects() throws IOException {
  620. for (List<ObjectToPack> list : objectsLists) {
  621. for (ObjectToPack otp : list) {
  622. if (writeMonitor.isCancelled())
  623. throw new IOException(
  624. "Packing cancelled during objects writing");
  625. if (!otp.isWritten())
  626. writeObject(otp);
  627. }
  628. }
  629. }
  630. private void writeObject(final ObjectToPack otp) throws IOException {
  631. otp.markWantWrite();
  632. if (otp.isDeltaRepresentation()) {
  633. ObjectToPack deltaBase = otp.getDeltaBase();
  634. assert deltaBase != null || thin;
  635. if (deltaBase != null && !deltaBase.isWritten()) {
  636. if (deltaBase.wantWrite()) {
  637. otp.clearDeltaBase(); // cycle detected
  638. otp.disposeLoader();
  639. } else {
  640. writeObject(deltaBase);
  641. }
  642. }
  643. }
  644. assert !otp.isWritten();
  645. out.resetCRC32();
  646. otp.setOffset(out.length());
  647. final PackedObjectLoader reuse = open(otp);
  648. if (reuse != null) {
  649. try {
  650. if (otp.isDeltaRepresentation())
  651. writeDeltaObjectHeader(otp, reuse);
  652. else
  653. writeObjectHeader(otp.getType(), reuse.getSize());
  654. reuse.copyRawData(out, buf, windowCursor);
  655. } finally {
  656. reuse.endCopyRawData();
  657. }
  658. } else if (otp.isDeltaRepresentation()) {
  659. throw new IOException("creating deltas is not implemented");
  660. } else {
  661. writeWholeObjectDeflate(otp);
  662. }
  663. otp.setCRC(out.getCRC32());
  664. writeMonitor.update(1);
  665. }
  666. private PackedObjectLoader open(final ObjectToPack otp) throws IOException {
  667. for (;;) {
  668. PackedObjectLoader reuse = otp.useLoader();
  669. if (reuse == null) {
  670. return null;
  671. }
  672. try {
  673. reuse.beginCopyRawData();
  674. return reuse;
  675. } catch (IOException err) {
  676. // The pack we found the object in originally is gone, or
  677. // it has been overwritten with a different layout.
  678. //
  679. otp.clearDeltaBase();
  680. searchForReuse(new ArrayList<PackedObjectLoader>(), otp);
  681. continue;
  682. }
  683. }
  684. }
  685. private void writeWholeObjectDeflate(final ObjectToPack otp)
  686. throws IOException {
  687. final ObjectLoader loader = db.openObject(windowCursor, otp);
  688. final byte[] data = loader.getCachedBytes();
  689. writeObjectHeader(otp.getType(), data.length);
  690. deflater.reset();
  691. deflater.setInput(data, 0, data.length);
  692. deflater.finish();
  693. do {
  694. final int n = deflater.deflate(buf, 0, buf.length);
  695. if (n > 0)
  696. out.write(buf, 0, n);
  697. } while (!deflater.finished());
  698. }
  699. private void writeDeltaObjectHeader(final ObjectToPack otp,
  700. final PackedObjectLoader reuse) throws IOException {
  701. if (deltaBaseAsOffset && otp.getDeltaBase() != null) {
  702. writeObjectHeader(Constants.OBJ_OFS_DELTA, reuse.getRawSize());
  703. final ObjectToPack deltaBase = otp.getDeltaBase();
  704. long offsetDiff = otp.getOffset() - deltaBase.getOffset();
  705. int pos = buf.length - 1;
  706. buf[pos] = (byte) (offsetDiff & 0x7F);
  707. while ((offsetDiff >>= 7) > 0) {
  708. buf[--pos] = (byte) (0x80 | (--offsetDiff & 0x7F));
  709. }
  710. out.write(buf, pos, buf.length - pos);
  711. } else {
  712. writeObjectHeader(Constants.OBJ_REF_DELTA, reuse.getRawSize());
  713. otp.getDeltaBaseId().copyRawTo(buf, 0);
  714. out.write(buf, 0, Constants.OBJECT_ID_LENGTH);
  715. }
  716. }
  717. private void writeObjectHeader(final int objectType, long dataLength)
  718. throws IOException {
  719. long nextLength = dataLength >>> 4;
  720. int size = 0;
  721. buf[size++] = (byte) ((nextLength > 0 ? 0x80 : 0x00)
  722. | (objectType << 4) | (dataLength & 0x0F));
  723. dataLength = nextLength;
  724. while (dataLength > 0) {
  725. nextLength >>>= 7;
  726. buf[size++] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (dataLength & 0x7F));
  727. dataLength = nextLength;
  728. }
  729. out.write(buf, 0, size);
  730. }
  731. private void writeChecksum() throws IOException {
  732. packcsum = out.getDigest();
  733. out.write(packcsum);
  734. }
  735. private ObjectWalk setUpWalker(
  736. final Collection<? extends ObjectId> interestingObjects,
  737. final Collection<? extends ObjectId> uninterestingObjects)
  738. throws MissingObjectException, IOException,
  739. IncorrectObjectTypeException {
  740. final ObjectWalk walker = new ObjectWalk(db);
  741. walker.setRetainBody(false);
  742. walker.sort(RevSort.TOPO);
  743. walker.sort(RevSort.COMMIT_TIME_DESC, true);
  744. if (thin)
  745. walker.sort(RevSort.BOUNDARY, true);
  746. for (ObjectId id : interestingObjects) {
  747. RevObject o = walker.parseAny(id);
  748. walker.markStart(o);
  749. }
  750. if (uninterestingObjects != null) {
  751. for (ObjectId id : uninterestingObjects) {
  752. final RevObject o;
  753. try {
  754. o = walker.parseAny(id);
  755. } catch (MissingObjectException x) {
  756. if (ignoreMissingUninteresting)
  757. continue;
  758. throw x;
  759. }
  760. walker.markUninteresting(o);
  761. }
  762. }
  763. return walker;
  764. }
  765. private void findObjectsToPack(final ObjectWalk walker)
  766. throws MissingObjectException, IncorrectObjectTypeException,
  767. IOException {
  768. initMonitor.beginTask(COUNTING_OBJECTS_PROGRESS,
  769. ProgressMonitor.UNKNOWN);
  770. RevObject o;
  771. while ((o = walker.next()) != null) {
  772. addObject(o);
  773. initMonitor.update(1);
  774. }
  775. while ((o = walker.nextObject()) != null) {
  776. addObject(o);
  777. initMonitor.update(1);
  778. }
  779. initMonitor.endTask();
  780. }
  781. /**
  782. * Include one object to the output file.
  783. * <p>
  784. * Objects are written in the order they are added. If the same object is
  785. * added twice, it may be written twice, creating a larger than necessary
  786. * file.
  787. *
  788. * @param object
  789. * the object to add.
  790. * @throws IncorrectObjectTypeException
  791. * the object is an unsupported type.
  792. */
  793. public void addObject(final RevObject object)
  794. throws IncorrectObjectTypeException {
  795. if (object.has(RevFlag.UNINTERESTING)) {
  796. edgeObjects.add(object);
  797. thin = true;
  798. return;
  799. }
  800. final ObjectToPack otp = new ObjectToPack(object, object.getType());
  801. try {
  802. objectsLists[object.getType()].add(otp);
  803. } catch (ArrayIndexOutOfBoundsException x) {
  804. throw new IncorrectObjectTypeException(object,
  805. "COMMIT nor TREE nor BLOB nor TAG");
  806. } catch (UnsupportedOperationException x) {
  807. // index pointing to "dummy" empty list
  808. throw new IncorrectObjectTypeException(object,
  809. "COMMIT nor TREE nor BLOB nor TAG");
  810. }
  811. objectsMap.add(otp);
  812. }
  813. /**
  814. * Class holding information about object that is going to be packed by
  815. * {@link PackWriter}. Information include object representation in a
  816. * pack-file and object status.
  817. *
  818. */
  819. static class ObjectToPack extends PackedObjectInfo {
  820. private ObjectId deltaBase;
  821. private PackedObjectLoader reuseLoader;
  822. /**
  823. * Bit field, from bit 0 to bit 31:
  824. * <ul>
  825. * <li>1 bit: wantWrite</li>
  826. * <li>3 bits: type</li>
  827. * <li>28 bits: deltaDepth</li>
  828. * </ul>
  829. */
  830. private int flags;
  831. /**
  832. * Construct object for specified object id. <br/> By default object is
  833. * marked as not written and non-delta packed (as a whole object).
  834. *
  835. * @param src
  836. * object id of object for packing
  837. * @param type
  838. * real type code of the object, not its in-pack type.
  839. */
  840. ObjectToPack(AnyObjectId src, final int type) {
  841. super(src);
  842. flags |= type << 1;
  843. }
  844. /**
  845. * @return delta base object id if object is going to be packed in delta
  846. * representation; null otherwise - if going to be packed as a
  847. * whole object.
  848. */
  849. ObjectId getDeltaBaseId() {
  850. return deltaBase;
  851. }
  852. /**
  853. * @return delta base object to pack if object is going to be packed in
  854. * delta representation and delta is specified as object to
  855. * pack; null otherwise - if going to be packed as a whole
  856. * object or delta base is specified only as id.
  857. */
  858. ObjectToPack getDeltaBase() {
  859. if (deltaBase instanceof ObjectToPack)
  860. return (ObjectToPack) deltaBase;
  861. return null;
  862. }
  863. /**
  864. * Set delta base for the object. Delta base set by this method is used
  865. * by {@link PackWriter} to write object - determines its representation
  866. * in a created pack.
  867. *
  868. * @param deltaBase
  869. * delta base object or null if object should be packed as a
  870. * whole object.
  871. *
  872. */
  873. void setDeltaBase(ObjectId deltaBase) {
  874. this.deltaBase = deltaBase;
  875. }
  876. void clearDeltaBase() {
  877. this.deltaBase = null;
  878. }
  879. /**
  880. * @return true if object is going to be written as delta; false
  881. * otherwise.
  882. */
  883. boolean isDeltaRepresentation() {
  884. return deltaBase != null;
  885. }
  886. /**
  887. * Check if object is already written in a pack. This information is
  888. * used to achieve delta-base precedence in a pack file.
  889. *
  890. * @return true if object is already written; false otherwise.
  891. */
  892. boolean isWritten() {
  893. return getOffset() != 0;
  894. }
  895. PackedObjectLoader useLoader() {
  896. final PackedObjectLoader r = reuseLoader;
  897. reuseLoader = null;
  898. return r;
  899. }
  900. boolean hasReuseLoader() {
  901. return reuseLoader != null;
  902. }
  903. void setReuseLoader(PackedObjectLoader reuseLoader) {
  904. this.reuseLoader = reuseLoader;
  905. }
  906. void disposeLoader() {
  907. this.reuseLoader = null;
  908. }
  909. int getType() {
  910. return (flags>>1) & 0x7;
  911. }
  912. int getDeltaDepth() {
  913. return flags >>> 4;
  914. }
  915. void updateDeltaDepth() {
  916. final int d;
  917. if (deltaBase instanceof ObjectToPack)
  918. d = ((ObjectToPack) deltaBase).getDeltaDepth() + 1;
  919. else if (deltaBase != null)
  920. d = 1;
  921. else
  922. d = 0;
  923. flags = (d << 4) | flags & 0x15;
  924. }
  925. boolean wantWrite() {
  926. return (flags & 1) == 1;
  927. }
  928. void markWantWrite() {
  929. flags |= 1;
  930. }
  931. }
  932. }