summaryrefslogtreecommitdiffstats
path: root/org.eclipse.jgit
diff options
context:
space:
mode:
authorTerry Parker <tparker@google.com>2015-10-30 17:47:06 -0700
committerTerry Parker <tparker@google.com>2015-11-03 15:04:37 -0800
commit6a3127b257ae275e43b91d3b733ac2129c0792c3 (patch)
tree192dea8981dab4c50e4e03ef06c61ff3103fd073 /org.eclipse.jgit
parent6d00e007ffa5766baa2b62a7f8c8cc5ac17b9fd0 (diff)
downloadjgit-6a3127b257ae275e43b91d3b733ac2129c0792c3.tar.gz
jgit-6a3127b257ae275e43b91d3b733ac2129c0792c3.zip
[performance] Speed up delta packing
When packing is able to reuse lots of deltas from existing packs, those objects are marked as "doNotAttemptDelta" and do not contribute to DeltaTask's computeTopPaths() "totalWeight" calculation. In the extreme case when all packs are reusable, "totalWeight" will be zero. DeltaTask.partitionTasks() uses "totalWeight" to determine a "weightPerThread" size it uses to set up DeltaTasks. When "totalWeight" is small, partitionTasks() ends up creating a DeltaTask for every unique path. For a large repository, the small "weightPerThread" can result in the creation of >100k tasks (for the MSM 3.10 Linux repository, the count was ~150k). This makes the "task stealing" mechanism in DeltaTask very inefficient, because every attempt to steal work does a linear walk through all tasks, searching for the one with the most work remaining, which is O(N^2) comparisons. For the MSM 3.10 repository when all deltas were reusable, PackWriter.parallelDeltaSearch() took (1615+1633+1458)/3 = 1568 seconds. The error is that DeltaTask treats the weights of objects marked as "doNotAttemptDelta" inconsistently. It ignores the weights when calculating "totalWeight" but uses them when partitioning the tasks. The fix is to also ignore them when partitioning the tasks. With this patch applied, PackWriter.parallelDeltaSearch() on the MSM 3.10 repository when all deltas are reused went from taking 1568 seconds to 62ms (>25k speedup). This patch also fixes a totalWeight initialization error in DeltaTask.computeTopPaths(). Change-Id: I2ae37efa83bca42b0e716266ae6aa9d182e76d9c Signed-off-by: Terry Parker <tparker@google.com>
Diffstat (limited to 'org.eclipse.jgit')
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java23
1 files changed, 15 insertions, 8 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java
index 9c84fb7ad2..076df18800 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java
@@ -145,7 +145,7 @@ final class DeltaTask implements Callable<Object> {
}
s = i = topPaths.get(nextTop++).slice.endIndex;
} else {
- w += list[i++].getWeight();
+ w += getAdjustedWeight(list[i++]);
}
}
@@ -180,8 +180,8 @@ final class DeltaTask implements Callable<Object> {
threads);
int cp = beginIndex;
int ch = list[cp].getPathHash();
- long cw = list[cp].getWeight();
- totalWeight = list[cp].getWeight();
+ long cw = getAdjustedWeight(list[cp]);
+ totalWeight = cw;
for (int i = cp + 1; i < endIndex; i++) {
ObjectToPack o = list[i];
@@ -206,11 +206,9 @@ final class DeltaTask implements Callable<Object> {
ch = o.getPathHash();
cw = 0;
}
- if (o.isEdge() || o.doNotAttemptDelta()) {
- continue;
- }
- cw += o.getWeight();
- totalWeight += o.getWeight();
+ int weight = getAdjustedWeight(o);
+ cw += weight;
+ totalWeight += weight;
}
// Sort by starting index to identify gaps later.
@@ -228,6 +226,15 @@ final class DeltaTask implements Callable<Object> {
}
}
+ private static int getAdjustedWeight(ObjectToPack o) {
+ // Edge objects and those with reused deltas do not need to be
+ // compressed. For compression calculations, ignore their weights.
+ if (o.isEdge() || o.doNotAttemptDelta()) {
+ return 0;
+ }
+ return o.getWeight();
+ }
+
static final class WeightedPath implements Comparable<WeightedPath> {
final long weight;
final Slice slice;