Browse Source

Throw BinaryBlobException from RawParseUtils#lineMap.

This makes detection of binaries exact for ResolveMerger and
DiffFormatter: they will classify files as binary regardless of where
the '\0' occurs in the text.

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Change-Id: Id4342a199628d9406bfa04af1b023c27a47d4014
tags/v4.10.0.201712302008-r
Han-Wen Nienhuys 6 years ago
parent
commit
f2e64cd895

+ 10
- 0
org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java View File

@@ -64,6 +64,16 @@ public class RawTextTest {
assertEquals(0, r.size());
}

@Test
public void testBinary() {
String input = "foo-a\nf\0o-b\n";
byte[] data = Constants.encodeASCII(input);
final RawText a = new RawText(data);
assertEquals(a.content, data);
assertEquals(a.size(), 1);
assertEquals(a.getString(0, 1, false), input);
}

@Test
public void testEquals() {
final RawText a = new RawText(Constants.encodeASCII("foo-a\nfoo-b\n"));

+ 9
- 9
org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java View File

@@ -48,45 +48,45 @@ import static org.junit.Assert.assertNotNull;

import java.io.UnsupportedEncodingException;

import org.eclipse.jgit.errors.BinaryBlobException;
import org.junit.Test;

public class RawParseUtils_LineMapTest {
@Test
public void testEmpty() {
public void testEmpty() throws Exception {
final IntList map = RawParseUtils.lineMap(new byte[] {}, 0, 0);
assertNotNull(map);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0}, asInts(map));
}

@Test
public void testOneBlankLine() {
public void testOneBlankLine() throws Exception {
final IntList map = RawParseUtils.lineMap(new byte[] { '\n' }, 0, 1);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 1}, asInts(map));
}

@Test
public void testTwoLineFooBar() throws UnsupportedEncodingException {
public void testTwoLineFooBar() throws Exception {
final byte[] buf = "foo\nbar\n".getBytes("ISO-8859-1");
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map));
}

@Test
public void testTwoLineNoLF() throws UnsupportedEncodingException {
public void testTwoLineNoLF() throws Exception {
final byte[] buf = "foo\nbar".getBytes("ISO-8859-1");
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map));
}

@Test
public void testBinary() throws UnsupportedEncodingException {
@Test(expected = BinaryBlobException.class)
public void testBinary() throws Exception {
final byte[] buf = "xxxfoo\nb\0ar".getBytes("ISO-8859-1");
final IntList map = RawParseUtils.lineMap(buf, 3, buf.length);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 3, buf.length}, asInts(map));
RawParseUtils.lineMap(buf, 3, buf.length);
}

@Test
public void testFourLineBlanks() throws UnsupportedEncodingException {
public void testFourLineBlanks() throws Exception {
final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1");
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);


+ 25
- 2
org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java View File

@@ -93,7 +93,29 @@ public class RawText extends Sequence {
*/
public RawText(final byte[] input) {
content = input;
lines = RawParseUtils.lineMap(content, 0, content.length);
IntList map;
try {
map = RawParseUtils.lineMap(content, 0, content.length);
} catch (BinaryBlobException e) {
map = new IntList(3);
map.add(Integer.MIN_VALUE);
map.add(0);
map.add(content.length);
}
lines = map;
}

/**
* Construct a new RawText if the line map is already known.
*
* @param data
* the blob data.
* @param lineMap
* Indices of line starts, with indexed by base-1 linenumber.
*/
private RawText(final byte[] data, final IntList lineMap) {
content = data;
lines = lineMap;
}

/**
@@ -357,7 +379,8 @@ public class RawText extends Sequence {

System.arraycopy(head, 0, data, 0, head.length);
IO.readFully(stream, data, off, (int) (sz-off));
return new RawText(data);
IntList lineMap = RawParseUtils.lineMap(data, 0, data.length);
return new RawText(data, lineMap);
}
}
}

+ 4
- 11
org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java View File

@@ -63,6 +63,7 @@ import java.util.HashMap;
import java.util.Map;

import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.errors.BinaryBlobException;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.PersonIdent;

@@ -618,9 +619,6 @@ public final class RawParseUtils {
* <p>
* The last element (index <code>map.size()-1</code>) always contains
* <code>end</code>.
* <p>
* If the data contains a '\0' anywhere, the whole region is considered binary
* and a LineMap corresponding to a single line is returned.
* </p>
*
* @param buf
@@ -631,10 +629,9 @@ public final class RawParseUtils {
* @param end
* 1 past the end of the content within <code>buf</code>.
* @return a line map indexing the start position of each line.
* @throws BinaryBlobException if any '\0' is found.
*/
public static final IntList lineMap(final byte[] buf, int ptr, int end) {
int start = ptr;

public static final IntList lineMap(final byte[] buf, int ptr, int end) throws BinaryBlobException {
// Experimentally derived from multiple source repositories
// the average number of bytes/line is 36. Its a rough guess
// to initially size our map close to the target.
@@ -647,11 +644,7 @@ public final class RawParseUtils {
}

if (buf[ptr] == '\0') {
// binary data.
map = new IntList(3);
map.add(Integer.MIN_VALUE);
map.add(start);
break;
throw new BinaryBlobException();
}

foundLF = (buf[ptr] == '\n');

Loading…
Cancel
Save