From bb1956e647d2f48ef36ef98a7f22120fdad0b37c Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Fri, 18 Mar 2011 07:27:41 -0700 Subject: [PATCH] PackWriter: Collect stats by object type Frequently enough I'm wondering how much of a pack is commits vs. trees, and the total line doesn't really tell us this because its a gross total from the pack. Computing the counts per object type is simple during packing, as PackWriter already has everything in memory broken up by object type. Its virtually free to get these values and track them. Change-Id: Id5e6b1902ea909c72f103a0fbca5d8bc316f9ab3 Signed-off-by: Shawn O. Pearce --- .../eclipse/jgit/storage/pack/PackWriter.java | 128 ++++++++++++++++-- 1 file changed, 120 insertions(+), 8 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java index eaa866348e..9c247d0101 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java @@ -168,6 +168,8 @@ public class PackWriter { private final Statistics stats; + private Statistics.ObjectType typeStats; + private List sortedByName; private byte packcsum[]; @@ -650,14 +652,17 @@ public class PackWriter { writeMonitor.beginTask(JGitText.get().writingObjects, (int) objCnt); long writeStart = System.currentTimeMillis(); - long headerStart = out.length(); out.writeFileHeader(PACK_VERSION_GENERATED, objCnt); out.flush(); - long headerEnd = out.length(); writeObjects(out); - if (!edgeObjects.isEmpty() || !cachedPacks.isEmpty()) - stats.thinPackBytes = out.length() - (headerEnd - headerStart); + if (!edgeObjects.isEmpty() || !cachedPacks.isEmpty()) { + for (Statistics.ObjectType typeStat : stats.objectTypes) { + if (typeStat == null) + continue; + stats.thinPackBytes += typeStat.bytes; + } + } for (CachedPack pack : cachedPacks) { long deltaCnt = pack.getDeltaCount(); @@ -672,6 +677,16 @@ public class PackWriter { stats.totalBytes = out.length(); stats.reusedPacks = Collections.unmodifiableList(cachedPacks); + for (Statistics.ObjectType typeStat : stats.objectTypes) { + if (typeStat == null) + continue; + typeStat.cntDeltas += typeStat.reusedDeltas; + + stats.reusedObjects += typeStat.reusedObjects; + stats.reusedDeltas += typeStat.reusedDeltas; + stats.totalDeltas += typeStat.cntDeltas; + } + reader.release(); writeMonitor.endTask(); } @@ -1015,12 +1030,21 @@ public class PackWriter { private void writeObjects(PackOutputStream out, List list) throws IOException { + if (list.isEmpty()) + return; + + typeStats = stats.objectTypes[list.get(0).getType()]; + long beginOffset = out.length(); + if (reuseSupport != null) { reuseSupport.writeObjects(out, list); } else { for (ObjectToPack otp : list) out.writeObject(otp); } + + typeStats.bytes += out.length() - beginOffset; + typeStats.cntObjects = list.size(); } void writeObject(PackOutputStream out, ObjectToPack otp) throws IOException { @@ -1039,10 +1063,10 @@ public class PackWriter { reuseSupport.copyObjectAsIs(out, otp, reuseValidate); out.endObject(); otp.setCRC(out.getCRC32()); - stats.reusedObjects++; + typeStats.reusedObjects++; if (otp.isDeltaRepresentation()) { - stats.totalDeltas++; - stats.reusedDeltas++; + typeStats.reusedDeltas++; + typeStats.deltaBytes += out.length() - otp.getOffset(); } return; } catch (StoredObjectRepresentationNotAvailableException gone) { @@ -1138,7 +1162,8 @@ public class PackWriter { DeflaterOutputStream dst = new DeflaterOutputStream(out, deflater); delta.writeTo(dst, null); dst.finish(); - stats.totalDeltas++; + typeStats.cntDeltas++; + typeStats.deltaBytes += out.length() - otp.getOffset(); } private TemporaryBuffer.Heap delta(final ObjectToPack otp) @@ -1579,6 +1604,74 @@ public class PackWriter { /** Summary of how PackWriter created the pack. */ public static class Statistics { + /** Statistics about a single class of object. */ + public static class ObjectType { + long cntObjects; + + long cntDeltas; + + long reusedObjects; + + long reusedDeltas; + + long bytes; + + long deltaBytes; + + /** + * @return total number of objects output. This total includes the + * value of {@link #getDeltas()}. + */ + public long getObjects() { + return cntObjects; + } + + /** + * @return total number of deltas output. This may be lower than the + * actual number of deltas if a cached pack was reused. + */ + public long getDeltas() { + return cntDeltas; + } + + /** + * @return number of objects whose existing representation was + * reused in the output. This count includes + * {@link #getReusedDeltas()}. + */ + public long getReusedObjects() { + return reusedObjects; + } + + /** + * @return number of deltas whose existing representation was reused + * in the output, as their base object was also output or + * was assumed present for a thin pack. This may be lower + * than the actual number of reused deltas if a cached pack + * was reused. + */ + public long getReusedDeltas() { + return reusedDeltas; + } + + /** + * @return total number of bytes written. This size includes the + * object headers as well as the compressed data. This size + * also includes all of {@link #getDeltaBytes()}. + */ + public long getBytes() { + return bytes; + } + + /** + * @return number of delta bytes written. This size includes the + * object headers for the delta objects. + */ + public long getDeltaBytes() { + return deltaBytes; + } + } + Set interestingObjects; Set uninterestingObjects; @@ -1611,6 +1704,16 @@ public class PackWriter { long timeWriting; + ObjectType[] objectTypes; + + { + objectTypes = new ObjectType[5]; + objectTypes[Constants.OBJ_COMMIT] = new ObjectType(); + objectTypes[Constants.OBJ_TREE] = new ObjectType(); + objectTypes[Constants.OBJ_BLOB] = new ObjectType(); + objectTypes[Constants.OBJ_TAG] = new ObjectType(); + } + /** * @return unmodifiable collection of objects to be included in the * pack. May be null if the pack was hand-crafted in a unit @@ -1708,6 +1811,15 @@ public class PackWriter { return thinPackBytes; } + /** + * @param typeCode + * object type code, e.g. OBJ_COMMIT or OBJ_TREE. + * @return information about this type of object in the pack. + */ + public ObjectType byObjectType(int typeCode) { + return objectTypes[typeCode]; + } + /** * @return time in milliseconds spent enumerating the objects that need * to be included in the output. This time includes any restarts -- 2.39.5