diff options
author | Han-Wen Nienhuys <hanwen@google.com> | 2017-10-09 18:11:04 +0200 |
---|---|---|
committer | Han-Wen Nienhuys <hanwen@google.com> | 2017-10-24 14:49:10 +0200 |
commit | ea2a4e3abec852788c15c89d1637d145c5b2ce52 (patch) | |
tree | 24ffe4f3c0edc3f6f47886e034fa94a1ecd7d788 | |
parent | fbefe1e999d44e4cdb605c238da7d916fada7274 (diff) | |
download | jgit-ea2a4e3abec852788c15c89d1637d145c5b2ce52.tar.gz jgit-ea2a4e3abec852788c15c89d1637d145c5b2ce52.zip |
Introduce RawText#load.
This method creates a RawText from a blob, but avoids reading the blob
if the start contains null bytes. This should reduce the amount of
garbage that Gerrit produces for changes with binaries.
Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Change-Id: Idd202d20251f2d1653e5f1ca374fe644c2cf205f
4 files changed, 271 insertions, 53 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextLoadTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextLoadTest.java new file mode 100644 index 0000000000..a11402f2fe --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextLoadTest.java @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.eclipse.jgit.diff; + +import org.eclipse.jgit.errors.BinaryBlobException; +import org.eclipse.jgit.internal.storage.file.FileRepository; +import org.eclipse.jgit.junit.RepositoryTestCase; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectInserter; +import org.eclipse.jgit.lib.ObjectLoader; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +public class RawTextLoadTest extends RepositoryTestCase { + private static byte[] generate(int size, int nullAt) { + byte[] data = new byte[size]; + for (int i = 0; i < data.length; i++) { + data[i] = (byte) ((i % 72 == 0) ? '\n' : (i%10) + '0'); + } + if (nullAt >= 0) { + data[nullAt] = '\0'; + } + return data; + } + + private RawText textFor(byte[] data, int limit) throws IOException, BinaryBlobException { + FileRepository repo = createBareRepository(); + ObjectId id; + try (ObjectInserter ins = repo.getObjectDatabase().newInserter()) { + id = ins.insert(Constants.OBJ_BLOB, data); + } + ObjectLoader ldr = repo.open(id); + return RawText.load(ldr, limit); + } + + @Test + public void testSmallOK() throws Exception { + byte[] data = generate(1000, -1); + RawText result = textFor(data, 1 << 20); + Assert.assertArrayEquals(result.content, data); + } + + @Test(expected = BinaryBlobException.class) + public void testSmallNull() throws Exception { + byte[] data = generate(1000, 22); + textFor(data, 1 << 20); + } + + @Test + public void testBigOK() throws Exception { + byte[] data = generate(10000, -1); + RawText result = textFor(data, 1 << 20); + Assert.assertArrayEquals(result.content, data); + } + + @Test(expected = BinaryBlobException.class) + public void testBigWithNullAtStart() throws Exception { + byte[] data = generate(10000, 22); + textFor(data, 1 << 20); + } + + @Test(expected = BinaryBlobException.class) + public void testBinaryThreshold() throws Exception { + byte[] data = generate(2 << 20, -1); + textFor(data, 1 << 20); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java index fa7cc0df87..22366a6bd2 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java @@ -66,9 +66,9 @@ import org.eclipse.jgit.diff.DiffAlgorithm.SupportedAlgorithm; import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.dircache.DirCacheIterator; import org.eclipse.jgit.errors.AmbiguousObjectException; +import org.eclipse.jgit.errors.BinaryBlobException; import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.IncorrectObjectTypeException; -import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.lib.AbbreviatedObjectId; @@ -113,9 +113,6 @@ public class DiffFormatter implements AutoCloseable { /** Magic return content indicating it is empty or no content present. */ private static final byte[] EMPTY = new byte[] {}; - /** Magic return indicating the content is binary. */ - private static final byte[] BINARY = new byte[] {}; - private final OutputStream out; private ObjectReader reader; @@ -954,47 +951,50 @@ public class DiffFormatter implements AutoCloseable { // Content not changed (e.g. only mode, pure rename) editList = new EditList(); type = PatchType.UNIFIED; + res.header = new FileHeader(buf.toByteArray(), editList, type); + return res; + } - } else { - assertHaveReader(); - - byte[] aRaw, bRaw; + assertHaveReader(); - if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) { - aRaw = writeGitLinkText(ent.getOldId()); - bRaw = writeGitLinkText(ent.getNewId()); - } else { + RawText aRaw = null; + RawText bRaw = null; + if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) { + aRaw = new RawText(writeGitLinkText(ent.getOldId())); + bRaw = new RawText(writeGitLinkText(ent.getNewId())); + } else { + try { aRaw = open(OLD, ent); bRaw = open(NEW, ent); - } - - if (aRaw == BINARY || bRaw == BINARY // - || RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) { + } catch (BinaryBlobException e) { + // Do nothing; we check for null below. formatOldNewPaths(buf, ent); buf.write(encodeASCII("Binary files differ\n")); //$NON-NLS-1$ editList = new EditList(); type = PatchType.BINARY; + res.header = new FileHeader(buf.toByteArray(), editList, type); + return res; + } + } - } else { - res.a = new RawText(aRaw); - res.b = new RawText(bRaw); - editList = diff(res.a, res.b); - type = PatchType.UNIFIED; - - switch (ent.getChangeType()) { - case RENAME: - case COPY: - if (!editList.isEmpty()) - formatOldNewPaths(buf, ent); - break; + res.a = aRaw; + res.b = bRaw; + editList = diff(res.a, res.b); + type = PatchType.UNIFIED; - default: + switch (ent.getChangeType()) { + case RENAME: + case COPY: + if (!editList.isEmpty()) formatOldNewPaths(buf, ent); - break; - } - } + break; + + default: + formatOldNewPaths(buf, ent); + break; } + res.header = new FileHeader(buf.toByteArray(), editList, type); return res; } @@ -1009,13 +1009,13 @@ public class DiffFormatter implements AutoCloseable { } } - private byte[] open(DiffEntry.Side side, DiffEntry entry) - throws IOException { + private RawText open(DiffEntry.Side side, DiffEntry entry) + throws IOException, BinaryBlobException { if (entry.getMode(side) == FileMode.MISSING) - return EMPTY; + return RawText.EMPTY_TEXT; if (entry.getMode(side).getObjectType() != Constants.OBJ_BLOB) - return EMPTY; + return RawText.EMPTY_TEXT; AbbreviatedObjectId id = entry.getId(side); if (!id.isComplete()) { @@ -1036,23 +1036,8 @@ public class DiffFormatter implements AutoCloseable { throw new AmbiguousObjectException(id, ids); } - try { - ObjectLoader ldr = source.open(side, entry); - return ldr.getBytes(binaryFileThreshold); - - } catch (LargeObjectException.ExceedsLimit overLimit) { - return BINARY; - - } catch (LargeObjectException.ExceedsByteArrayLimit overLimit) { - return BINARY; - - } catch (LargeObjectException.OutOfMemory tooBig) { - return BINARY; - - } catch (LargeObjectException tooBig) { - tooBig.setObjectId(id.toObjectId()); - throw tooBig; - } + ObjectLoader ldr = source.open(side, entry); + return RawText.load(ldr, binaryFileThreshold); } /** diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java index 5bfee753a6..4fae3e478c 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java @@ -44,11 +44,15 @@ package org.eclipse.jgit.diff; +import java.io.EOFException; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import org.eclipse.jgit.errors.BinaryBlobException; +import org.eclipse.jgit.errors.LargeObjectException; +import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.util.IO; import org.eclipse.jgit.util.IntList; import org.eclipse.jgit.util.RawParseUtils; @@ -295,4 +299,65 @@ public class RawText extends Sequence { else return "\n"; //$NON-NLS-1$ } + + /** + * Read a blob object into RawText, or throw BinaryBlobException if + * the blob is binary. + * + * @param ldr + * the ObjectLoader for the blob + * @param threshold + * if the blob is larger than this size, it is always assumed to be binary. + * @since 4.10 + * @return the RawText representing the blob. + * @throws BinaryBlobException if the blob contains binary data. + * @throws IOException if the input could not be read. + */ + public static RawText load(ObjectLoader ldr, int threshold) throws IOException, BinaryBlobException { + long sz = ldr.getSize(); + + if (sz > threshold) { + throw new BinaryBlobException(); + } + + if (sz <= FIRST_FEW_BYTES) { + byte[] data = ldr.getCachedBytes(FIRST_FEW_BYTES); + if (isBinary(data)) { + throw new BinaryBlobException(); + } + return new RawText(data); + } + + byte[] head = new byte[FIRST_FEW_BYTES]; + try (InputStream stream = ldr.openStream()) { + int off = 0; + int left = head.length; + while (left > 0) { + int n = stream.read(head, off, left); + if (n < 0) { + throw new EOFException(); + } + left -= n; + + while (n > 0) { + if (head[off] == '\0') { + throw new BinaryBlobException(); + } + off++; + n--; + } + } + + byte data[]; + try { + data = new byte[(int)sz]; + } catch (OutOfMemoryError e) { + throw new LargeObjectException.OutOfMemory(e); + } + + System.arraycopy(head, 0, data, 0, head.length); + IO.readFully(stream, data, off, (int) (sz-off)); + return new RawText(data); + } + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/errors/BinaryBlobException.java b/org.eclipse.jgit/src/org/eclipse/jgit/errors/BinaryBlobException.java new file mode 100644 index 0000000000..a345fe4d59 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/errors/BinaryBlobException.java @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2017 Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.eclipse.jgit.errors; + +/** + * BinaryBlobException is used to signal that binary data was found + * in a context that requires text (eg. for generating textual diffs). + * + * @since 4.10 + */ +public class BinaryBlobException extends Exception { + private static final long serialVersionUID = 1L; + + /** + * Construct a BinaryBlobException. + */ + public BinaryBlobException() {} +} |