From 83eddaf7fda22ca64e9c3852df67ccef0dacdaf5 Mon Sep 17 00:00:00 2001 From: Thomas Wolf Date: Sun, 31 Oct 2021 01:35:52 +0200 Subject: Binary and CR-LF detection: lone CRs -> binary C git considers not only files containing NUL bytes as binary but also files containing lone CRs. Implement this also for JGit. C git additionally counts printable vs. non-printable characters and considers files that have non_printable_count > printable_count / 128 also as binary. This is not implemented because such counting probably only makes sense if one looks at the full file or blob content. The Auto[CR]LF* streams in JGit look only at the first few KiB of a stream in order not to buffer too much. For the C git implementation, see [1]. [1] https://github.com/git/git/blob/7e27bd589d/convert.c#L35 Bug: 576971 Change-Id: Ia169b59bdbf1477f32ee2014eeb8406f81d4b1ab Signed-off-by: Thomas Wolf --- .../org/eclipse/jgit/util/io/AutoCRLFInputStream.java | 2 +- .../eclipse/jgit/util/io/AutoCRLFOutputStream.java | 19 +++++++++++-------- .../org/eclipse/jgit/util/io/AutoLFInputStream.java | 4 ++-- .../org/eclipse/jgit/util/io/AutoLFOutputStream.java | 10 +++++----- 4 files changed, 19 insertions(+), 16 deletions(-) (limited to 'org.eclipse.jgit/src/org/eclipse/jgit/util/io') diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFInputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFInputStream.java index 1b03d097b6..cedb159827 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFInputStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFInputStream.java @@ -123,7 +123,7 @@ public class AutoCRLFInputStream extends InputStream { return false; } if (detectBinary) { - isBinary = RawText.isBinary(buf, cnt); + isBinary = RawText.isBinary(buf, cnt, cnt < buf.length); detectBinary = false; } ptr = 0; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFOutputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFOutputStream.java index 05e271febd..e638b2de3a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFOutputStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFOutputStream.java @@ -122,22 +122,24 @@ public class AutoCRLFOutputStream extends OutputStream { } private int buffer(byte[] b, int off, int len) throws IOException { - if (binbufcnt > binbuf.length) + if (binbufcnt > binbuf.length) { return len; + } int copy = Math.min(binbuf.length - binbufcnt, len); System.arraycopy(b, off, binbuf, binbufcnt, copy); binbufcnt += copy; int remaining = len - copy; - if (remaining > 0) - decideMode(); + if (remaining > 0) { + decideMode(false); + } return remaining; } - private void decideMode() throws IOException { + private void decideMode(boolean complete) throws IOException { if (detectBinary) { - isBinary = RawText.isBinary(binbuf, binbufcnt); + isBinary = RawText.isBinary(binbuf, binbufcnt, complete); if (!isBinary) { - isBinary = RawText.isCrLfText(binbuf, binbufcnt); + isBinary = RawText.isCrLfText(binbuf, binbufcnt, complete); } detectBinary = false; } @@ -149,8 +151,9 @@ public class AutoCRLFOutputStream extends OutputStream { /** {@inheritDoc} */ @Override public void flush() throws IOException { - if (binbufcnt <= binbuf.length) - decideMode(); + if (binbufcnt <= binbuf.length) { + decideMode(true); + } buf = -1; out.flush(); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFInputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFInputStream.java index b6d1848b3a..7db882c074 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFInputStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFInputStream.java @@ -262,14 +262,14 @@ public class AutoLFInputStream extends InputStream { return false; } if (detectBinary) { - isBinary = RawText.isBinary(buf, cnt); + isBinary = RawText.isBinary(buf, cnt, cnt < buf.length); passAsIs = isBinary; detectBinary = false; if (isBinary && abortIfBinary) { throw new IsBinaryException(); } if (!passAsIs && forCheckout) { - passAsIs = RawText.isCrLfText(buf, cnt); + passAsIs = RawText.isCrLfText(buf, cnt, cnt < buf.length); } } ptr = 0; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFOutputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFOutputStream.java index e08a53f502..a0e9fb68c5 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFOutputStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFOutputStream.java @@ -146,16 +146,16 @@ public class AutoLFOutputStream extends OutputStream { binbufcnt += copy; int remaining = len - copy; if (remaining > 0) { - decideMode(); + decideMode(false); } return remaining; } - private void decideMode() throws IOException { + private void decideMode(boolean complete) throws IOException { if (detectBinary) { - isBinary = RawText.isBinary(binbuf, binbufcnt); + isBinary = RawText.isBinary(binbuf, binbufcnt, complete); if (!isBinary) { - isBinary = RawText.isCrLfText(binbuf, binbufcnt); + isBinary = RawText.isCrLfText(binbuf, binbufcnt, complete); } detectBinary = false; } @@ -168,7 +168,7 @@ public class AutoLFOutputStream extends OutputStream { @Override public void flush() throws IOException { if (binbufcnt <= binbuf.length) { - decideMode(); + decideMode(true); } out.flush(); } -- cgit v1.2.3