Added a check in Diff to ensure that files that are most likely not text are not line-by-line diffed. Files are determined to be binary by checking the first 8000 bytes for a null character. This is a similar heuristic to what C Git uses. Change-Id: I2b6f05674c88d89b3f549a5db483f850f7f46c26tags/v0.9.1
@@ -132,16 +132,28 @@ class Diff extends TextBuiltin { | |||
+ (mode1.equals(mode2) ? " " + mode1 : "")); | |||
out.println("--- " + (isNew ? "/dev/null" : name1)); | |||
out.println("+++ " + (isDelete ? "/dev/null" : name2)); | |||
RawText a = getRawText(id1); | |||
RawText b = getRawText(id2); | |||
byte[] aRaw = getRawBytes(id1); | |||
byte[] bRaw = getRawBytes(id2); | |||
if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) { | |||
out.println("Binary files differ"); | |||
return; | |||
} | |||
RawText a = getRawText(aRaw); | |||
RawText b = getRawText(bRaw); | |||
MyersDiff diff = new MyersDiff(a, b); | |||
fmt.formatEdits(out, a, b, diff.getEdits()); | |||
} | |||
private RawText getRawText(ObjectId id) throws IOException { | |||
private byte[] getRawBytes(ObjectId id) throws IOException { | |||
if (id.equals(ObjectId.zeroId())) | |||
return new RawText(new byte[] {}); | |||
byte[] raw = db.openBlob(id).getCachedBytes(); | |||
return new byte[] {}; | |||
return db.openBlob(id).getCachedBytes(); | |||
} | |||
private RawText getRawText(byte[] raw) { | |||
if (ignoreWsAll) | |||
return new RawTextIgnoreAllWhitespace(raw); | |||
else if (ignoreWsTrailing) | |||
@@ -154,4 +166,3 @@ class Diff extends TextBuiltin { | |||
return new RawText(raw); | |||
} | |||
} | |||
@@ -65,6 +65,9 @@ import org.eclipse.jgit.util.RawParseUtils; | |||
* they are converting from "line number" to "element index". | |||
*/ | |||
public class RawText implements Sequence { | |||
/** Number of bytes to check for heuristics in {@link #isBinary(byte[])} */ | |||
private static final int FIRST_FEW_BYTES = 8000; | |||
/** The file content for this sequence. */ | |||
protected final byte[] content; | |||
@@ -202,4 +205,22 @@ public class RawText implements Sequence { | |||
hash = (hash << 5) ^ (raw[ptr] & 0xff); | |||
return hash; | |||
} | |||
/** | |||
* Determine heuristically whether a byte array represents binary (as | |||
* opposed to text) content. | |||
* | |||
* @param raw | |||
* the raw file content. | |||
* @return true if raw is likely to be a binary file, false otherwise | |||
*/ | |||
public static boolean isBinary(byte[] raw) { | |||
// Same heuristic as C Git | |||
int size = raw.length > FIRST_FEW_BYTES ? FIRST_FEW_BYTES : raw.length; | |||
for (int ptr = 0; ptr < size; ptr++) | |||
if (raw[ptr] == '\0') | |||
return true; | |||
return false; | |||
} | |||
} |