From e5db7c1f0e7431e68e854dc1152633999cf01555 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Tue, 29 Aug 2017 18:40:03 -0700 Subject: [PATCH] DfsFsck: reduce memory usage during verifyIndex Don't convert a lot of ObjectId to String stored in generic java.util.HashSet. This is a very expensive way to store objects. Instead rely on "this" from the FsckPackParser to lookup information about the objects in this pack file, which lets the verify code avoid sorting the object list. Use ObjectIdOwnerMap, which is the most efficient format JGit has for storing lots of objects. Change-Id: Ib68f93acb4d91b96d0a44c0612f704500d332ac1 --- .../jgit/internal/fsck/FsckPackParser.java | 22 ++++++++++++------- .../jgit/internal/storage/dfs/DfsFsck.java | 2 +- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckPackParser.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckPackParser.java index e6ec6814b3..184bf416e0 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckPackParser.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckPackParser.java @@ -49,7 +49,6 @@ import java.nio.channels.Channels; import java.text.MessageFormat; import java.util.Arrays; import java.util.HashSet; -import java.util.List; import java.util.Set; import java.util.zip.CRC32; @@ -65,6 +64,7 @@ import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.ObjectChecker; import org.eclipse.jgit.lib.ObjectDatabase; +import org.eclipse.jgit.lib.ObjectIdOwnerMap; import org.eclipse.jgit.transport.PackParser; import org.eclipse.jgit.transport.PackedObjectInfo; @@ -265,18 +265,18 @@ public class FsckPackParser extends PackParser { /** * Verify the existing index file with all objects from the pack. * - * @param entries - * all the entries that are expected in the index file * @param idx * index file associate with the pack * @throws CorruptPackIndexException * when the index file is corrupt. */ - public void verifyIndex(List entries, PackIndex idx) + public void verifyIndex(PackIndex idx) throws CorruptPackIndexException { - Set all = new HashSet<>(); - for (PackedObjectInfo entry : entries) { - all.add(entry.getName()); + ObjectIdOwnerMap inPack = new ObjectIdOwnerMap<>(); + for (int i = 0; i < getObjectCount(); i++) { + PackedObjectInfo entry = getObject(i); + inPack.add(new ObjFromPack(entry)); + long offset = idx.findOffset(entry); if (offset == -1) { throw new CorruptPackIndexException( @@ -305,7 +305,7 @@ public class FsckPackParser extends PackParser { } for (MutableEntry entry : idx) { - if (!all.contains(entry.name())) { + if (!inPack.contains(entry.toObjectId())) { throw new CorruptPackIndexException(MessageFormat.format( JGitText.get().unknownObjectInIndex, entry.name()), ErrorType.UNKNOWN_OBJ); @@ -323,4 +323,10 @@ public class FsckPackParser extends PackParser { public void overwriteObjectCount(long expectedObjectCount) { this.expectedObjectCount = expectedObjectCount; } + + static class ObjFromPack extends ObjectIdOwnerMap.Entry { + ObjFromPack(AnyObjectId id) { + super(id); + } + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java index 2580047b15..75eade2273 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java @@ -128,7 +128,7 @@ public class DfsFsck { fpp.parse(pm); errors.getCorruptObjects().addAll(fpp.getCorruptObjects()); - fpp.verifyIndex(fpp.getSortedObjectList(null), pack.getPackIndex(ctx)); + fpp.verifyIndex(pack.getPackIndex(ctx)); } private void checkConnectivity(ProgressMonitor pm, FsckError errors) -- 2.39.5