]> source.dussan.org Git - jgit.git/commitdiff
Improve DiffFormatter text file access 86/1486/1
authorShawn O. Pearce <spearce@spearce.org>
Wed, 1 Sep 2010 03:09:14 +0000 (20:09 -0700)
committerShawn O. Pearce <spearce@spearce.org>
Wed, 1 Sep 2010 17:19:43 +0000 (10:19 -0700)
When we are asked to create a difference between two files the caller
really wants to see that output.  Instead of punting because a file
is too big to process, consider it to be binary.  This reduces the
accuracy of our output display, but makes it a lot more likely that
the formatter can still generate something semi-useful.

We set our default binary threshold to 50 MiB, which is the same
threshold that PackWriter uses before punting and deciding a file
is too big to delta compress.  Anything under this size we try to
load and process, anything over that size (or that won't allocate
in the heap) gets tagged as binary.

Change-Id: I69553c9ef96db7f2058c6210657f1181ce882335
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java

index 52c9573905aa610e48d7e6a3ff42f09adf64a2db..cb145e4b5ab2492f64fbbabc8b6442eb9a51d7b9 100644 (file)
@@ -62,10 +62,10 @@ import org.eclipse.jgit.JGitText;
 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
 import org.eclipse.jgit.errors.AmbiguousObjectException;
 import org.eclipse.jgit.errors.CorruptObjectException;
+import org.eclipse.jgit.errors.LargeObjectException;
 import org.eclipse.jgit.errors.MissingObjectException;
 import org.eclipse.jgit.lib.AbbreviatedObjectId;
 import org.eclipse.jgit.lib.Constants;
-import org.eclipse.jgit.lib.CoreConfig;
 import org.eclipse.jgit.lib.FileMode;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.ObjectLoader;
@@ -74,6 +74,7 @@ import org.eclipse.jgit.lib.Repository;
 import org.eclipse.jgit.patch.FileHeader;
 import org.eclipse.jgit.patch.HunkHeader;
 import org.eclipse.jgit.patch.FileHeader.PatchType;
+import org.eclipse.jgit.storage.pack.PackConfig;
 import org.eclipse.jgit.util.QuotedString;
 import org.eclipse.jgit.util.io.DisabledOutputStream;
 
@@ -81,8 +82,16 @@ import org.eclipse.jgit.util.io.DisabledOutputStream;
  * Format a Git style patch script.
  */
 public class DiffFormatter {
+       private static final int DEFAULT_BINARY_FILE_THRESHOLD = PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
+
        private static final byte[] noNewLine = encodeASCII("\\ No newline at end of file\n");
 
+       /** Magic return content indicating it is empty or no content present. */
+       private static final byte[] EMPTY = new byte[] {};
+
+       /** Magic return indicating the content is binary. */
+       private static final byte[] BINARY = new byte[] {};
+
        private final OutputStream out;
 
        private Repository db;
@@ -93,7 +102,7 @@ public class DiffFormatter {
 
        private RawText.Factory rawTextFactory = RawText.FACTORY;
 
-       private int bigFileThreshold = 50 * 1024 * 1024;
+       private int binaryFileThreshold = DEFAULT_BINARY_FILE_THRESHOLD;
 
        private String oldPrefix = "a/";
 
@@ -124,9 +133,6 @@ public class DiffFormatter {
         */
        public void setRepository(Repository repository) {
                db = repository;
-
-               CoreConfig cfg = db.getConfig().get(CoreConfig.KEY);
-               bigFileThreshold = cfg.getStreamFileThreshold();
        }
 
        /**
@@ -175,16 +181,17 @@ public class DiffFormatter {
        }
 
        /**
-        * Set the maximum file size that should be considered for diff output.
-        * <p>
-        * Text files that are larger than this size will not have a difference
-        * generated during output.
+        * Set maximum file size for text files.
+        *
+        * Files larger than this size will be treated as though they are binary and
+        * not text. Default is {@value #DEFAULT_BINARY_FILE_THRESHOLD} .
         *
-        * @param bigFileThreshold
-        *            the limit, in bytes.
+        * @param threshold
+        *            the limit, in bytes. Files larger than this size will be
+        *            assumed to be binary, even if they aren't.
         */
-       public void setBigFileThreshold(int bigFileThreshold) {
-               this.bigFileThreshold = bigFileThreshold;
+       public void setBinaryFileThreshold(int threshold) {
+               this.binaryFileThreshold = threshold;
        }
 
        /**
@@ -282,28 +289,6 @@ public class DiffFormatter {
                return ('"' + name + '"').equals(q) ? name : q;
        }
 
-       private byte[] open(ObjectReader reader, FileMode mode,
-                       AbbreviatedObjectId id) throws IOException {
-               if (mode == FileMode.MISSING)
-                       return new byte[] {};
-
-               if (mode.getObjectType() != Constants.OBJ_BLOB)
-                       return new byte[] {};
-
-               if (!id.isComplete()) {
-                       Collection<ObjectId> ids = reader.resolve(id);
-                       if (ids.size() == 1)
-                               id = AbbreviatedObjectId.fromObjectId(ids.iterator().next());
-                       else if (ids.size() == 0)
-                               throw new MissingObjectException(id, Constants.OBJ_BLOB);
-                       else
-                               throw new AmbiguousObjectException(id, ids);
-               }
-
-               ObjectLoader ldr = reader.open(id.toObjectId());
-               return ldr.getCachedBytes(bigFileThreshold);
-       }
-
        /**
         * Format a patch script, reusing a previously parsed FileHeader.
         * <p>
@@ -560,16 +545,24 @@ public class DiffFormatter {
                        if (db == null)
                                throw new IllegalStateException(
                                                JGitText.get().repositoryIsRequired);
+
                        ObjectReader reader = db.newObjectReader();
                        byte[] aRaw, bRaw;
                        try {
-                               aRaw = open(reader, ent.getOldMode(), ent.getOldId());
-                               bRaw = open(reader, ent.getNewMode(), ent.getNewId());
+                               aRaw = open(reader, //
+                                               ent.getOldPath(), //
+                                               ent.getOldMode(), //
+                                               ent.getOldId());
+                               bRaw = open(reader, //
+                                               ent.getNewPath(), //
+                                               ent.getNewMode(), //
+                                               ent.getNewId());
                        } finally {
                                reader.release();
                        }
 
-                       if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
+                       if (aRaw == BINARY || bRaw == BINARY //
+                                       || RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
                                formatOldNewPaths(buf, ent);
                                buf.write(encodeASCII("Binary files differ\n"));
                                editList = new EditList();
@@ -599,6 +592,50 @@ public class DiffFormatter {
                return res;
        }
 
+       private byte[] open(ObjectReader reader, String path, FileMode mode,
+                       AbbreviatedObjectId id) throws IOException {
+               if (mode == FileMode.MISSING)
+                       return EMPTY;
+
+               if (mode.getObjectType() != Constants.OBJ_BLOB)
+                       return EMPTY;
+
+               if (isBinary(path))
+                       return BINARY;
+
+               if (!id.isComplete()) {
+                       Collection<ObjectId> ids = reader.resolve(id);
+                       if (ids.size() == 1)
+                               id = AbbreviatedObjectId.fromObjectId(ids.iterator().next());
+                       else if (ids.size() == 0)
+                               throw new MissingObjectException(id, Constants.OBJ_BLOB);
+                       else
+                               throw new AmbiguousObjectException(id, ids);
+               }
+
+               try {
+                       ObjectLoader ldr = reader.open(id.toObjectId());
+                       return ldr.getBytes(binaryFileThreshold);
+
+               } catch (LargeObjectException.ExceedsLimit overLimit) {
+                       return BINARY;
+
+               } catch (LargeObjectException.ExceedsByteArrayLimit overLimit) {
+                       return BINARY;
+
+               } catch (LargeObjectException.OutOfMemory tooBig) {
+                       return BINARY;
+
+               } catch (LargeObjectException tooBig) {
+                       tooBig.setObjectId(id.toObjectId());
+                       throw tooBig;
+               }
+       }
+
+       private boolean isBinary(String path) {
+               return false;
+       }
+
        private void formatHeader(ByteArrayOutputStream o, DiffEntry ent)
                        throws IOException {
                final ChangeType type = ent.getChangeType();