summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHan-Wen Nienhuys <hanwen@google.com>2017-10-09 18:11:04 +0200
committerHan-Wen Nienhuys <hanwen@google.com>2017-10-24 14:49:10 +0200
commitea2a4e3abec852788c15c89d1637d145c5b2ce52 (patch)
tree24ffe4f3c0edc3f6f47886e034fa94a1ecd7d788
parentfbefe1e999d44e4cdb605c238da7d916fada7274 (diff)
downloadjgit-ea2a4e3abec852788c15c89d1637d145c5b2ce52.tar.gz
jgit-ea2a4e3abec852788c15c89d1637d145c5b2ce52.zip
Introduce RawText#load.
This method creates a RawText from a blob, but avoids reading the blob if the start contains null bytes. This should reduce the amount of garbage that Gerrit produces for changes with binaries. Signed-off-by: Han-Wen Nienhuys <hanwen@google.com> Change-Id: Idd202d20251f2d1653e5f1ca374fe644c2cf205f
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextLoadTest.java110
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java91
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java65
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/errors/BinaryBlobException.java58
4 files changed, 271 insertions, 53 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextLoadTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextLoadTest.java
new file mode 100644
index 0000000000..a11402f2fe
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextLoadTest.java
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2017, Google Inc.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package org.eclipse.jgit.diff;
+
+import org.eclipse.jgit.errors.BinaryBlobException;
+import org.eclipse.jgit.internal.storage.file.FileRepository;
+import org.eclipse.jgit.junit.RepositoryTestCase;
+import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.ObjectInserter;
+import org.eclipse.jgit.lib.ObjectLoader;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.IOException;
+
+public class RawTextLoadTest extends RepositoryTestCase {
+ private static byte[] generate(int size, int nullAt) {
+ byte[] data = new byte[size];
+ for (int i = 0; i < data.length; i++) {
+ data[i] = (byte) ((i % 72 == 0) ? '\n' : (i%10) + '0');
+ }
+ if (nullAt >= 0) {
+ data[nullAt] = '\0';
+ }
+ return data;
+ }
+
+ private RawText textFor(byte[] data, int limit) throws IOException, BinaryBlobException {
+ FileRepository repo = createBareRepository();
+ ObjectId id;
+ try (ObjectInserter ins = repo.getObjectDatabase().newInserter()) {
+ id = ins.insert(Constants.OBJ_BLOB, data);
+ }
+ ObjectLoader ldr = repo.open(id);
+ return RawText.load(ldr, limit);
+ }
+
+ @Test
+ public void testSmallOK() throws Exception {
+ byte[] data = generate(1000, -1);
+ RawText result = textFor(data, 1 << 20);
+ Assert.assertArrayEquals(result.content, data);
+ }
+
+ @Test(expected = BinaryBlobException.class)
+ public void testSmallNull() throws Exception {
+ byte[] data = generate(1000, 22);
+ textFor(data, 1 << 20);
+ }
+
+ @Test
+ public void testBigOK() throws Exception {
+ byte[] data = generate(10000, -1);
+ RawText result = textFor(data, 1 << 20);
+ Assert.assertArrayEquals(result.content, data);
+ }
+
+ @Test(expected = BinaryBlobException.class)
+ public void testBigWithNullAtStart() throws Exception {
+ byte[] data = generate(10000, 22);
+ textFor(data, 1 << 20);
+ }
+
+ @Test(expected = BinaryBlobException.class)
+ public void testBinaryThreshold() throws Exception {
+ byte[] data = generate(2 << 20, -1);
+ textFor(data, 1 << 20);
+ }
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java
index fa7cc0df87..22366a6bd2 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java
@@ -66,9 +66,9 @@ import org.eclipse.jgit.diff.DiffAlgorithm.SupportedAlgorithm;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.dircache.DirCacheIterator;
import org.eclipse.jgit.errors.AmbiguousObjectException;
+import org.eclipse.jgit.errors.BinaryBlobException;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
-import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
@@ -113,9 +113,6 @@ public class DiffFormatter implements AutoCloseable {
/** Magic return content indicating it is empty or no content present. */
private static final byte[] EMPTY = new byte[] {};
- /** Magic return indicating the content is binary. */
- private static final byte[] BINARY = new byte[] {};
-
private final OutputStream out;
private ObjectReader reader;
@@ -954,47 +951,50 @@ public class DiffFormatter implements AutoCloseable {
// Content not changed (e.g. only mode, pure rename)
editList = new EditList();
type = PatchType.UNIFIED;
+ res.header = new FileHeader(buf.toByteArray(), editList, type);
+ return res;
+ }
- } else {
- assertHaveReader();
-
- byte[] aRaw, bRaw;
+ assertHaveReader();
- if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) {
- aRaw = writeGitLinkText(ent.getOldId());
- bRaw = writeGitLinkText(ent.getNewId());
- } else {
+ RawText aRaw = null;
+ RawText bRaw = null;
+ if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) {
+ aRaw = new RawText(writeGitLinkText(ent.getOldId()));
+ bRaw = new RawText(writeGitLinkText(ent.getNewId()));
+ } else {
+ try {
aRaw = open(OLD, ent);
bRaw = open(NEW, ent);
- }
-
- if (aRaw == BINARY || bRaw == BINARY //
- || RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
+ } catch (BinaryBlobException e) {
+ // Do nothing; we check for null below.
formatOldNewPaths(buf, ent);
buf.write(encodeASCII("Binary files differ\n")); //$NON-NLS-1$
editList = new EditList();
type = PatchType.BINARY;
+ res.header = new FileHeader(buf.toByteArray(), editList, type);
+ return res;
+ }
+ }
- } else {
- res.a = new RawText(aRaw);
- res.b = new RawText(bRaw);
- editList = diff(res.a, res.b);
- type = PatchType.UNIFIED;
-
- switch (ent.getChangeType()) {
- case RENAME:
- case COPY:
- if (!editList.isEmpty())
- formatOldNewPaths(buf, ent);
- break;
+ res.a = aRaw;
+ res.b = bRaw;
+ editList = diff(res.a, res.b);
+ type = PatchType.UNIFIED;
- default:
+ switch (ent.getChangeType()) {
+ case RENAME:
+ case COPY:
+ if (!editList.isEmpty())
formatOldNewPaths(buf, ent);
- break;
- }
- }
+ break;
+
+ default:
+ formatOldNewPaths(buf, ent);
+ break;
}
+
res.header = new FileHeader(buf.toByteArray(), editList, type);
return res;
}
@@ -1009,13 +1009,13 @@ public class DiffFormatter implements AutoCloseable {
}
}
- private byte[] open(DiffEntry.Side side, DiffEntry entry)
- throws IOException {
+ private RawText open(DiffEntry.Side side, DiffEntry entry)
+ throws IOException, BinaryBlobException {
if (entry.getMode(side) == FileMode.MISSING)
- return EMPTY;
+ return RawText.EMPTY_TEXT;
if (entry.getMode(side).getObjectType() != Constants.OBJ_BLOB)
- return EMPTY;
+ return RawText.EMPTY_TEXT;
AbbreviatedObjectId id = entry.getId(side);
if (!id.isComplete()) {
@@ -1036,23 +1036,8 @@ public class DiffFormatter implements AutoCloseable {
throw new AmbiguousObjectException(id, ids);
}
- try {
- ObjectLoader ldr = source.open(side, entry);
- return ldr.getBytes(binaryFileThreshold);
-
- } catch (LargeObjectException.ExceedsLimit overLimit) {
- return BINARY;
-
- } catch (LargeObjectException.ExceedsByteArrayLimit overLimit) {
- return BINARY;
-
- } catch (LargeObjectException.OutOfMemory tooBig) {
- return BINARY;
-
- } catch (LargeObjectException tooBig) {
- tooBig.setObjectId(id.toObjectId());
- throw tooBig;
- }
+ ObjectLoader ldr = source.open(side, entry);
+ return RawText.load(ldr, binaryFileThreshold);
}
/**
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
index 5bfee753a6..4fae3e478c 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
@@ -44,11 +44,15 @@
package org.eclipse.jgit.diff;
+import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import org.eclipse.jgit.errors.BinaryBlobException;
+import org.eclipse.jgit.errors.LargeObjectException;
+import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.util.IO;
import org.eclipse.jgit.util.IntList;
import org.eclipse.jgit.util.RawParseUtils;
@@ -295,4 +299,65 @@ public class RawText extends Sequence {
else
return "\n"; //$NON-NLS-1$
}
+
+ /**
+ * Read a blob object into RawText, or throw BinaryBlobException if
+ * the blob is binary.
+ *
+ * @param ldr
+ * the ObjectLoader for the blob
+ * @param threshold
+ * if the blob is larger than this size, it is always assumed to be binary.
+ * @since 4.10
+ * @return the RawText representing the blob.
+ * @throws BinaryBlobException if the blob contains binary data.
+ * @throws IOException if the input could not be read.
+ */
+ public static RawText load(ObjectLoader ldr, int threshold) throws IOException, BinaryBlobException {
+ long sz = ldr.getSize();
+
+ if (sz > threshold) {
+ throw new BinaryBlobException();
+ }
+
+ if (sz <= FIRST_FEW_BYTES) {
+ byte[] data = ldr.getCachedBytes(FIRST_FEW_BYTES);
+ if (isBinary(data)) {
+ throw new BinaryBlobException();
+ }
+ return new RawText(data);
+ }
+
+ byte[] head = new byte[FIRST_FEW_BYTES];
+ try (InputStream stream = ldr.openStream()) {
+ int off = 0;
+ int left = head.length;
+ while (left > 0) {
+ int n = stream.read(head, off, left);
+ if (n < 0) {
+ throw new EOFException();
+ }
+ left -= n;
+
+ while (n > 0) {
+ if (head[off] == '\0') {
+ throw new BinaryBlobException();
+ }
+ off++;
+ n--;
+ }
+ }
+
+ byte data[];
+ try {
+ data = new byte[(int)sz];
+ } catch (OutOfMemoryError e) {
+ throw new LargeObjectException.OutOfMemory(e);
+ }
+
+ System.arraycopy(head, 0, data, 0, head.length);
+ IO.readFully(stream, data, off, (int) (sz-off));
+ return new RawText(data);
+ }
+ }
}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/errors/BinaryBlobException.java b/org.eclipse.jgit/src/org/eclipse/jgit/errors/BinaryBlobException.java
new file mode 100644
index 0000000000..a345fe4d59
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/errors/BinaryBlobException.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2017 Google Inc.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package org.eclipse.jgit.errors;
+
+/**
+ * BinaryBlobException is used to signal that binary data was found
+ * in a context that requires text (eg. for generating textual diffs).
+ *
+ * @since 4.10
+ */
+public class BinaryBlobException extends Exception {
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Construct a BinaryBlobException.
+ */
+ public BinaryBlobException() {}
+}