You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ReftableCompactor.java 6.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. /*
  2. * Copyright (C) 2017, Google Inc. and others
  3. *
  4. * This program and the accompanying materials are made available under the
  5. * terms of the Eclipse Distribution License v. 1.0 which is available at
  6. * https://www.eclipse.org/org/documents/edl-v10.php.
  7. *
  8. * SPDX-License-Identifier: BSD-3-Clause
  9. */
  10. package org.eclipse.jgit.internal.storage.reftable;
  11. import java.io.IOException;
  12. import java.io.OutputStream;
  13. import java.util.ArrayDeque;
  14. import java.util.ArrayList;
  15. import java.util.List;
  16. import org.eclipse.jgit.internal.storage.reftable.ReftableWriter.Stats;
  17. import org.eclipse.jgit.lib.PersonIdent;
  18. import org.eclipse.jgit.lib.ReflogEntry;
  19. /**
  20. * Merges reftables and compacts them into a single output.
  21. * <p>
  22. * For a partial compaction callers should {@link #setIncludeDeletes(boolean)}
  23. * to {@code true} to ensure the new reftable continues to use a delete marker
  24. * to shadow any lower reftable that may have the reference present.
  25. * <p>
  26. * By default all log entries within the range defined by
  27. * {@link #setReflogExpireMinUpdateIndex(long)} and {@link #setReflogExpireMaxUpdateIndex(long)} are
  28. * copied, even if no references in the output file match the log records.
  29. * Callers may truncate the log to a more recent time horizon with
  30. * {@link #setReflogExpireOldestReflogTimeMillis(long)}, or disable the log altogether with
  31. * {@code setOldestReflogTimeMillis(Long.MAX_VALUE)}.
  32. */
  33. public class ReftableCompactor {
  34. private final ReftableWriter writer;
  35. private final ArrayDeque<ReftableReader> tables = new ArrayDeque<>();
  36. private boolean includeDeletes;
  37. private long reflogExpireMinUpdateIndex = 0;
  38. private long reflogExpireMaxUpdateIndex = Long.MAX_VALUE;
  39. private long reflogExpireOldestReflogTimeMillis;
  40. private Stats stats;
  41. /**
  42. * Creates a new compactor.
  43. *
  44. * @param out
  45. * stream to write the compacted tables to. Caller is responsible
  46. * for closing {@code out}.
  47. */
  48. public ReftableCompactor(OutputStream out) {
  49. writer = new ReftableWriter(out);
  50. }
  51. /**
  52. * Set configuration for the reftable.
  53. *
  54. * @param cfg
  55. * configuration for the reftable.
  56. * @return {@code this}
  57. */
  58. public ReftableCompactor setConfig(ReftableConfig cfg) {
  59. writer.setConfig(cfg);
  60. return this;
  61. }
  62. /**
  63. * Whether to include deletions in the output, which may be necessary for
  64. * partial compaction.
  65. *
  66. * @param deletes
  67. * {@code true} to include deletions in the output, which may be
  68. * necessary for partial compaction.
  69. * @return {@code this}
  70. */
  71. public ReftableCompactor setIncludeDeletes(boolean deletes) {
  72. includeDeletes = deletes;
  73. return this;
  74. }
  75. /**
  76. * Set the minimum update index for log entries that appear in the compacted
  77. * reftable.
  78. *
  79. * @param min
  80. * the minimum update index for log entries that appear in the
  81. * compacted reftable. This should be 1 higher than the prior
  82. * reftable's {@code maxUpdateIndex} if this table will be used
  83. * in a stack.
  84. * @return {@code this}
  85. */
  86. public ReftableCompactor setReflogExpireMinUpdateIndex(long min) {
  87. reflogExpireMinUpdateIndex = min;
  88. return this;
  89. }
  90. /**
  91. * Set the maximum update index for log entries that appear in the compacted
  92. * reftable.
  93. *
  94. * @param max
  95. * the maximum update index for log entries that appear in the
  96. * compacted reftable. This should be at least 1 higher than the
  97. * prior reftable's {@code maxUpdateIndex} if this table will be
  98. * used in a stack.
  99. * @return {@code this}
  100. */
  101. public ReftableCompactor setReflogExpireMaxUpdateIndex(long max) {
  102. reflogExpireMaxUpdateIndex = max;
  103. return this;
  104. }
  105. /**
  106. * Set oldest reflog time to preserve.
  107. *
  108. * @param timeMillis
  109. * oldest log time to preserve. Entries whose timestamps are
  110. * {@code >= timeMillis} will be copied into the output file. Log
  111. * entries that predate {@code timeMillis} will be discarded.
  112. * Specified in Java standard milliseconds since the epoch.
  113. * @return {@code this}
  114. */
  115. public ReftableCompactor setReflogExpireOldestReflogTimeMillis(long timeMillis) {
  116. reflogExpireOldestReflogTimeMillis = timeMillis;
  117. return this;
  118. }
  119. /**
  120. * Add all of the tables, in the specified order.
  121. *
  122. * @param readers
  123. * tables to compact. Tables should be ordered oldest first/most
  124. * recent last so that the more recent tables can shadow the
  125. * older results. Caller is responsible for closing the readers.
  126. * @throws java.io.IOException
  127. * update indexes of a reader cannot be accessed.
  128. */
  129. public void addAll(List<ReftableReader> readers) throws IOException {
  130. for (ReftableReader r : readers) {
  131. tables.add(r);
  132. }
  133. }
  134. /**
  135. * Write a compaction to {@code out}.
  136. *
  137. * @throws java.io.IOException
  138. * if tables cannot be read, or cannot be written.
  139. */
  140. public void compact() throws IOException {
  141. MergedReftable mr = new MergedReftable(new ArrayList<>(tables));
  142. mr.setIncludeDeletes(includeDeletes);
  143. writer.setMaxUpdateIndex(mr.maxUpdateIndex());
  144. writer.setMinUpdateIndex(mr.minUpdateIndex());
  145. writer.begin();
  146. mergeRefs(mr);
  147. mergeLogs(mr);
  148. writer.finish();
  149. stats = writer.getStats();
  150. }
  151. /**
  152. * Get statistics of the last written reftable.
  153. *
  154. * @return statistics of the last written reftable.
  155. */
  156. public Stats getStats() {
  157. return stats;
  158. }
  159. private void mergeRefs(MergedReftable mr) throws IOException {
  160. try (RefCursor rc = mr.allRefs()) {
  161. while (rc.next()) {
  162. writer.writeRef(rc.getRef(), rc.getRef().getUpdateIndex());
  163. }
  164. }
  165. }
  166. private void mergeLogs(MergedReftable mr) throws IOException {
  167. if (reflogExpireOldestReflogTimeMillis == Long.MAX_VALUE) {
  168. return;
  169. }
  170. try (LogCursor lc = mr.allLogs()) {
  171. while (lc.next()) {
  172. long updateIndex = lc.getUpdateIndex();
  173. if (updateIndex > reflogExpireMaxUpdateIndex || updateIndex < reflogExpireMinUpdateIndex) {
  174. continue;
  175. }
  176. String refName = lc.getRefName();
  177. ReflogEntry log = lc.getReflogEntry();
  178. if (log == null) {
  179. if (includeDeletes) {
  180. writer.deleteLog(refName, updateIndex);
  181. }
  182. continue;
  183. }
  184. PersonIdent who = log.getWho();
  185. if (who.getWhen().getTime() >= reflogExpireOldestReflogTimeMillis) {
  186. writer.writeLog(
  187. refName,
  188. updateIndex,
  189. who,
  190. log.getOldId(),
  191. log.getNewId(),
  192. log.getComment());
  193. }
  194. }
  195. }
  196. }
  197. }