You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

StageBuilder.java 9.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. /*
  2. * Copyright (C) 2016, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.internal.ketch;
  44. import static org.eclipse.jgit.lib.FileMode.TYPE_GITLINK;
  45. import java.io.IOException;
  46. import java.util.ArrayList;
  47. import java.util.HashSet;
  48. import java.util.List;
  49. import java.util.Set;
  50. import org.eclipse.jgit.annotations.Nullable;
  51. import org.eclipse.jgit.lib.AnyObjectId;
  52. import org.eclipse.jgit.lib.CommitBuilder;
  53. import org.eclipse.jgit.lib.ObjectId;
  54. import org.eclipse.jgit.lib.ObjectInserter;
  55. import org.eclipse.jgit.lib.PersonIdent;
  56. import org.eclipse.jgit.lib.Repository;
  57. import org.eclipse.jgit.revwalk.RevCommit;
  58. import org.eclipse.jgit.revwalk.RevObject;
  59. import org.eclipse.jgit.revwalk.RevWalk;
  60. import org.eclipse.jgit.transport.ReceiveCommand;
  61. import org.eclipse.jgit.treewalk.EmptyTreeIterator;
  62. import org.eclipse.jgit.treewalk.TreeWalk;
  63. import org.eclipse.jgit.treewalk.filter.TreeFilter;
  64. /**
  65. * Constructs a set of commands to stage content during a proposal.
  66. */
  67. public class StageBuilder {
  68. /**
  69. * Acceptable number of references to send in a single stage transaction.
  70. * <p>
  71. * If the number of unique objects exceeds this amount the builder will
  72. * attempt to decrease the reference count by chaining commits..
  73. */
  74. private static final int SMALL_BATCH_SIZE = 5;
  75. /**
  76. * Acceptable number of commits to chain together using parent pointers.
  77. * <p>
  78. * When staging many unique commits the {@link StageBuilder} batches
  79. * together unrelated commits as parents of a temporary commit. After the
  80. * proposal completes the temporary commit is discarded and can be garbage
  81. * collected by all replicas.
  82. */
  83. private static final int TEMP_PARENT_BATCH_SIZE = 128;
  84. private static final byte[] PEEL = { ' ', '^' };
  85. private final String txnStage;
  86. private final String txnId;
  87. /**
  88. * Construct a stage builder for a transaction.
  89. *
  90. * @param txnStageNamespace
  91. * namespace for transaction references to build
  92. * {@code "txnStageNamespace/txnId.n"} style names.
  93. * @param txnId
  94. * identifier used to name temporary staging refs.
  95. */
  96. public StageBuilder(String txnStageNamespace, ObjectId txnId) {
  97. this.txnStage = txnStageNamespace;
  98. this.txnId = txnId.name();
  99. }
  100. /**
  101. * Compare two RefTrees and return commands to stage new objects.
  102. * <p>
  103. * This method ignores the lineage between the two RefTrees and does a
  104. * straight diff on the two trees. New objects will be staged. The diff
  105. * strategy is useful to catch-up a lagging replica, without sending every
  106. * intermediate step. This may mean the replica does not have the same
  107. * object set as other replicas if there are rewinds or branch deletes.
  108. *
  109. * @param git
  110. * source repository to read {@code oldTree} and {@code newTree}
  111. * from.
  112. * @param oldTree
  113. * accepted RefTree on the replica ({@code refs/txn/accepted}).
  114. * Use {@link org.eclipse.jgit.lib.ObjectId#zeroId()} if the
  115. * remote does not have any ref tree, e.g. a new replica catching
  116. * up.
  117. * @param newTree
  118. * RefTree being sent to the replica. The trees will be compared.
  119. * @return list of commands to create {@code "refs/txn/stage/..."}
  120. * references on replicas anchoring new objects into the repository
  121. * while a transaction gains consensus.
  122. * @throws java.io.IOException
  123. * {@code git} cannot be accessed to compare {@code oldTree} and
  124. * {@code newTree} to build the object set.
  125. */
  126. public List<ReceiveCommand> makeStageList(Repository git, ObjectId oldTree,
  127. ObjectId newTree) throws IOException {
  128. try (RevWalk rw = new RevWalk(git);
  129. TreeWalk tw = new TreeWalk(rw.getObjectReader());
  130. ObjectInserter ins = git.newObjectInserter()) {
  131. if (AnyObjectId.isEqual(oldTree, ObjectId.zeroId())) {
  132. tw.addTree(new EmptyTreeIterator());
  133. } else {
  134. tw.addTree(rw.parseTree(oldTree));
  135. }
  136. tw.addTree(rw.parseTree(newTree));
  137. tw.setFilter(TreeFilter.ANY_DIFF);
  138. tw.setRecursive(true);
  139. Set<ObjectId> newObjs = new HashSet<>();
  140. while (tw.next()) {
  141. if (tw.getRawMode(1) == TYPE_GITLINK
  142. && !tw.isPathSuffix(PEEL, 2)) {
  143. newObjs.add(tw.getObjectId(1));
  144. }
  145. }
  146. List<ReceiveCommand> cmds = makeStageList(newObjs, git, ins);
  147. ins.flush();
  148. return cmds;
  149. }
  150. }
  151. /**
  152. * Construct a set of commands to stage objects on a replica.
  153. *
  154. * @param newObjs
  155. * objects to send to a replica.
  156. * @param git
  157. * local repository to read source objects from. Required to
  158. * perform minification of {@code newObjs}.
  159. * @param inserter
  160. * inserter to write temporary commit objects during minification
  161. * if many new branches are created by {@code newObjs}.
  162. * @return list of commands to create {@code "refs/txn/stage/..."}
  163. * references on replicas anchoring {@code newObjs} into the
  164. * repository while a transaction gains consensus.
  165. * @throws java.io.IOException
  166. * {@code git} cannot be accessed to perform minification of
  167. * {@code newObjs}.
  168. */
  169. public List<ReceiveCommand> makeStageList(Set<ObjectId> newObjs,
  170. @Nullable Repository git, @Nullable ObjectInserter inserter)
  171. throws IOException {
  172. if (git == null || newObjs.size() <= SMALL_BATCH_SIZE) {
  173. // Without a source repository can only construct unique set.
  174. List<ReceiveCommand> cmds = new ArrayList<>(newObjs.size());
  175. for (ObjectId id : newObjs) {
  176. stage(cmds, id);
  177. }
  178. return cmds;
  179. }
  180. List<ReceiveCommand> cmds = new ArrayList<>();
  181. List<RevCommit> commits = new ArrayList<>();
  182. reduceObjects(cmds, commits, git, newObjs);
  183. if (inserter == null || commits.size() <= 1
  184. || (cmds.size() + commits.size()) <= SMALL_BATCH_SIZE) {
  185. // Without an inserter to aggregate commits, or for a small set of
  186. // commits just send one stage ref per commit.
  187. for (RevCommit c : commits) {
  188. stage(cmds, c.copy());
  189. }
  190. return cmds;
  191. }
  192. // 'commits' is sorted most recent to least recent commit.
  193. // Group batches of commits and build a chain.
  194. // TODO(sop) Cluster by restricted graphs to support filtering.
  195. ObjectId tip = null;
  196. for (int end = commits.size(); end > 0;) {
  197. int start = Math.max(0, end - TEMP_PARENT_BATCH_SIZE);
  198. List<RevCommit> batch = commits.subList(start, end);
  199. List<ObjectId> parents = new ArrayList<>(1 + batch.size());
  200. if (tip != null) {
  201. parents.add(tip);
  202. }
  203. parents.addAll(batch);
  204. CommitBuilder b = new CommitBuilder();
  205. b.setTreeId(batch.get(0).getTree());
  206. b.setParentIds(parents);
  207. b.setAuthor(tmpAuthor(batch));
  208. b.setCommitter(b.getAuthor());
  209. tip = inserter.insert(b);
  210. end = start;
  211. }
  212. stage(cmds, tip);
  213. return cmds;
  214. }
  215. private static PersonIdent tmpAuthor(List<RevCommit> commits) {
  216. // Construct a predictable author using most recent commit time.
  217. int t = 0;
  218. for (int i = 0; i < commits.size();) {
  219. t = Math.max(t, commits.get(i).getCommitTime());
  220. }
  221. String name = "Ketch Stage"; //$NON-NLS-1$
  222. String email = "tmp@tmp"; //$NON-NLS-1$
  223. return new PersonIdent(name, email, t * 1000L, 0);
  224. }
  225. private void reduceObjects(List<ReceiveCommand> cmds,
  226. List<RevCommit> commits, Repository git,
  227. Set<ObjectId> newObjs) throws IOException {
  228. try (RevWalk rw = new RevWalk(git)) {
  229. rw.setRetainBody(false);
  230. for (ObjectId id : newObjs) {
  231. RevObject obj = rw.parseAny(id);
  232. if (obj instanceof RevCommit) {
  233. rw.markStart((RevCommit) obj);
  234. } else {
  235. stage(cmds, id);
  236. }
  237. }
  238. for (RevCommit c; (c = rw.next()) != null;) {
  239. commits.add(c);
  240. rw.markUninteresting(c);
  241. }
  242. }
  243. }
  244. private void stage(List<ReceiveCommand> cmds, ObjectId id) {
  245. int estLen = txnStage.length() + txnId.length() + 5;
  246. StringBuilder n = new StringBuilder(estLen);
  247. n.append(txnStage).append(txnId).append('.');
  248. n.append(Integer.toHexString(cmds.size()));
  249. cmds.add(new ReceiveCommand(ObjectId.zeroId(), id, n.toString()));
  250. }
  251. }