You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

StageBuilder.java 9.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. /*
  2. * Copyright (C) 2016, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.internal.ketch;
  44. import static org.eclipse.jgit.lib.FileMode.TYPE_GITLINK;
  45. import java.io.IOException;
  46. import java.util.ArrayList;
  47. import java.util.HashSet;
  48. import java.util.List;
  49. import java.util.Set;
  50. import org.eclipse.jgit.annotations.Nullable;
  51. import org.eclipse.jgit.lib.AnyObjectId;
  52. import org.eclipse.jgit.lib.CommitBuilder;
  53. import org.eclipse.jgit.lib.ObjectId;
  54. import org.eclipse.jgit.lib.ObjectInserter;
  55. import org.eclipse.jgit.lib.PersonIdent;
  56. import org.eclipse.jgit.lib.Repository;
  57. import org.eclipse.jgit.revwalk.RevCommit;
  58. import org.eclipse.jgit.revwalk.RevObject;
  59. import org.eclipse.jgit.revwalk.RevWalk;
  60. import org.eclipse.jgit.transport.ReceiveCommand;
  61. import org.eclipse.jgit.treewalk.EmptyTreeIterator;
  62. import org.eclipse.jgit.treewalk.TreeWalk;
  63. import org.eclipse.jgit.treewalk.filter.TreeFilter;
  64. /** Constructs a set of commands to stage content during a proposal. */
  65. public class StageBuilder {
  66. /**
  67. * Acceptable number of references to send in a single stage transaction.
  68. * <p>
  69. * If the number of unique objects exceeds this amount the builder will
  70. * attempt to decrease the reference count by chaining commits..
  71. */
  72. private static final int SMALL_BATCH_SIZE = 5;
  73. /**
  74. * Acceptable number of commits to chain together using parent pointers.
  75. * <p>
  76. * When staging many unique commits the {@link StageBuilder} batches
  77. * together unrelated commits as parents of a temporary commit. After the
  78. * proposal completes the temporary commit is discarded and can be garbage
  79. * collected by all replicas.
  80. */
  81. private static final int TEMP_PARENT_BATCH_SIZE = 128;
  82. private static final byte[] PEEL = { ' ', '^' };
  83. private final String txnStage;
  84. private final String txnId;
  85. /**
  86. * Construct a stage builder for a transaction.
  87. *
  88. * @param txnStageNamespace
  89. * namespace for transaction references to build
  90. * {@code "txnStageNamespace/txnId.n"} style names.
  91. * @param txnId
  92. * identifier used to name temporary staging refs.
  93. */
  94. public StageBuilder(String txnStageNamespace, ObjectId txnId) {
  95. this.txnStage = txnStageNamespace;
  96. this.txnId = txnId.name();
  97. }
  98. /**
  99. * Compare two RefTrees and return commands to stage new objects.
  100. * <p>
  101. * This method ignores the lineage between the two RefTrees and does a
  102. * straight diff on the two trees. New objects will be staged. The diff
  103. * strategy is useful to catch-up a lagging replica, without sending every
  104. * intermediate step. This may mean the replica does not have the same
  105. * object set as other replicas if there are rewinds or branch deletes.
  106. *
  107. * @param git
  108. * source repository to read {@code oldTree} and {@code newTree}
  109. * from.
  110. * @param oldTree
  111. * accepted RefTree on the replica ({@code refs/txn/accepted}).
  112. * Use {@link ObjectId#zeroId()} if the remote does not have any
  113. * ref tree, e.g. a new replica catching up.
  114. * @param newTree
  115. * RefTree being sent to the replica. The trees will be compared.
  116. * @return list of commands to create {@code "refs/txn/stage/..."}
  117. * references on replicas anchoring new objects into the repository
  118. * while a transaction gains consensus.
  119. * @throws IOException
  120. * {@code git} cannot be accessed to compare {@code oldTree} and
  121. * {@code newTree} to build the object set.
  122. */
  123. public List<ReceiveCommand> makeStageList(Repository git, ObjectId oldTree,
  124. ObjectId newTree) throws IOException {
  125. try (RevWalk rw = new RevWalk(git);
  126. TreeWalk tw = new TreeWalk(rw.getObjectReader());
  127. ObjectInserter ins = git.newObjectInserter()) {
  128. if (AnyObjectId.equals(oldTree, ObjectId.zeroId())) {
  129. tw.addTree(new EmptyTreeIterator());
  130. } else {
  131. tw.addTree(rw.parseTree(oldTree));
  132. }
  133. tw.addTree(rw.parseTree(newTree));
  134. tw.setFilter(TreeFilter.ANY_DIFF);
  135. tw.setRecursive(true);
  136. Set<ObjectId> newObjs = new HashSet<>();
  137. while (tw.next()) {
  138. if (tw.getRawMode(1) == TYPE_GITLINK
  139. && !tw.isPathSuffix(PEEL, 2)) {
  140. newObjs.add(tw.getObjectId(1));
  141. }
  142. }
  143. List<ReceiveCommand> cmds = makeStageList(newObjs, git, ins);
  144. ins.flush();
  145. return cmds;
  146. }
  147. }
  148. /**
  149. * Construct a set of commands to stage objects on a replica.
  150. *
  151. * @param newObjs
  152. * objects to send to a replica.
  153. * @param git
  154. * local repository to read source objects from. Required to
  155. * perform minification of {@code newObjs}.
  156. * @param inserter
  157. * inserter to write temporary commit objects during minification
  158. * if many new branches are created by {@code newObjs}.
  159. * @return list of commands to create {@code "refs/txn/stage/..."}
  160. * references on replicas anchoring {@code newObjs} into the
  161. * repository while a transaction gains consensus.
  162. * @throws IOException
  163. * {@code git} cannot be accessed to perform minification of
  164. * {@code newObjs}.
  165. */
  166. public List<ReceiveCommand> makeStageList(Set<ObjectId> newObjs,
  167. @Nullable Repository git, @Nullable ObjectInserter inserter)
  168. throws IOException {
  169. if (git == null || newObjs.size() <= SMALL_BATCH_SIZE) {
  170. // Without a source repository can only construct unique set.
  171. List<ReceiveCommand> cmds = new ArrayList<>(newObjs.size());
  172. for (ObjectId id : newObjs) {
  173. stage(cmds, id);
  174. }
  175. return cmds;
  176. }
  177. List<ReceiveCommand> cmds = new ArrayList<>();
  178. List<RevCommit> commits = new ArrayList<>();
  179. reduceObjects(cmds, commits, git, newObjs);
  180. if (inserter == null || commits.size() <= 1
  181. || (cmds.size() + commits.size()) <= SMALL_BATCH_SIZE) {
  182. // Without an inserter to aggregate commits, or for a small set of
  183. // commits just send one stage ref per commit.
  184. for (RevCommit c : commits) {
  185. stage(cmds, c.copy());
  186. }
  187. return cmds;
  188. }
  189. // 'commits' is sorted most recent to least recent commit.
  190. // Group batches of commits and build a chain.
  191. // TODO(sop) Cluster by restricted graphs to support filtering.
  192. ObjectId tip = null;
  193. for (int end = commits.size(); end > 0;) {
  194. int start = Math.max(0, end - TEMP_PARENT_BATCH_SIZE);
  195. List<RevCommit> batch = commits.subList(start, end);
  196. List<ObjectId> parents = new ArrayList<>(1 + batch.size());
  197. if (tip != null) {
  198. parents.add(tip);
  199. }
  200. parents.addAll(batch);
  201. CommitBuilder b = new CommitBuilder();
  202. b.setTreeId(batch.get(0).getTree());
  203. b.setParentIds(parents);
  204. b.setAuthor(tmpAuthor(batch));
  205. b.setCommitter(b.getAuthor());
  206. tip = inserter.insert(b);
  207. end = start;
  208. }
  209. stage(cmds, tip);
  210. return cmds;
  211. }
  212. private static PersonIdent tmpAuthor(List<RevCommit> commits) {
  213. // Construct a predictable author using most recent commit time.
  214. int t = 0;
  215. for (int i = 0; i < commits.size();) {
  216. t = Math.max(t, commits.get(i).getCommitTime());
  217. }
  218. String name = "Ketch Stage"; //$NON-NLS-1$
  219. String email = "tmp@tmp"; //$NON-NLS-1$
  220. return new PersonIdent(name, email, t * 1000L, 0);
  221. }
  222. private void reduceObjects(List<ReceiveCommand> cmds,
  223. List<RevCommit> commits, Repository git,
  224. Set<ObjectId> newObjs) throws IOException {
  225. try (RevWalk rw = new RevWalk(git)) {
  226. rw.setRetainBody(false);
  227. for (ObjectId id : newObjs) {
  228. RevObject obj = rw.parseAny(id);
  229. if (obj instanceof RevCommit) {
  230. rw.markStart((RevCommit) obj);
  231. } else {
  232. stage(cmds, id);
  233. }
  234. }
  235. for (RevCommit c; (c = rw.next()) != null;) {
  236. commits.add(c);
  237. rw.markUninteresting(c);
  238. }
  239. }
  240. }
  241. private void stage(List<ReceiveCommand> cmds, ObjectId id) {
  242. int estLen = txnStage.length() + txnId.length() + 5;
  243. StringBuilder n = new StringBuilder(estLen);
  244. n.append(txnStage).append(txnId).append('.');
  245. n.append(Integer.toHexString(cmds.size()));
  246. cmds.add(new ReceiveCommand(ObjectId.zeroId(), id, n.toString()));
  247. }
  248. }