diff options
author | Thomas Wolf <thomas.wolf@paranor.ch> | 2021-10-31 01:35:52 +0200 |
---|---|---|
committer | Thomas Wolf <thomas.wolf@paranor.ch> | 2021-10-31 13:02:04 +0100 |
commit | 83eddaf7fda22ca64e9c3852df67ccef0dacdaf5 (patch) | |
tree | da257cd574e6b83c93bc277a5ea5cad742c0630d /org.eclipse.jgit/src/org/eclipse/jgit/util/io | |
parent | 3444a3be8c8a567f944fd7b81838e615852d787a (diff) | |
download | jgit-83eddaf7fda22ca64e9c3852df67ccef0dacdaf5.tar.gz jgit-83eddaf7fda22ca64e9c3852df67ccef0dacdaf5.zip |
Binary and CR-LF detection: lone CRs -> binary
C git considers not only files containing NUL bytes as binary but also
files containing lone CRs. Implement this also for JGit.
C git additionally counts printable vs. non-printable characters and
considers files that have non_printable_count > printable_count / 128
also as binary. This is not implemented because such counting probably
only makes sense if one looks at the full file or blob content. The
Auto[CR]LF* streams in JGit look only at the first few KiB of a stream
in order not to buffer too much.
For the C git implementation, see [1].
[1] https://github.com/git/git/blob/7e27bd589d/convert.c#L35
Bug: 576971
Change-Id: Ia169b59bdbf1477f32ee2014eeb8406f81d4b1ab
Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
Diffstat (limited to 'org.eclipse.jgit/src/org/eclipse/jgit/util/io')
4 files changed, 19 insertions, 16 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFInputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFInputStream.java index 1b03d097b6..cedb159827 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFInputStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFInputStream.java @@ -123,7 +123,7 @@ public class AutoCRLFInputStream extends InputStream { return false; } if (detectBinary) { - isBinary = RawText.isBinary(buf, cnt); + isBinary = RawText.isBinary(buf, cnt, cnt < buf.length); detectBinary = false; } ptr = 0; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFOutputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFOutputStream.java index 05e271febd..e638b2de3a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFOutputStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFOutputStream.java @@ -122,22 +122,24 @@ public class AutoCRLFOutputStream extends OutputStream { } private int buffer(byte[] b, int off, int len) throws IOException { - if (binbufcnt > binbuf.length) + if (binbufcnt > binbuf.length) { return len; + } int copy = Math.min(binbuf.length - binbufcnt, len); System.arraycopy(b, off, binbuf, binbufcnt, copy); binbufcnt += copy; int remaining = len - copy; - if (remaining > 0) - decideMode(); + if (remaining > 0) { + decideMode(false); + } return remaining; } - private void decideMode() throws IOException { + private void decideMode(boolean complete) throws IOException { if (detectBinary) { - isBinary = RawText.isBinary(binbuf, binbufcnt); + isBinary = RawText.isBinary(binbuf, binbufcnt, complete); if (!isBinary) { - isBinary = RawText.isCrLfText(binbuf, binbufcnt); + isBinary = RawText.isCrLfText(binbuf, binbufcnt, complete); } detectBinary = false; } @@ -149,8 +151,9 @@ public class AutoCRLFOutputStream extends OutputStream { /** {@inheritDoc} */ @Override public void flush() throws IOException { - if (binbufcnt <= binbuf.length) - decideMode(); + if (binbufcnt <= binbuf.length) { + decideMode(true); + } buf = -1; out.flush(); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFInputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFInputStream.java index b6d1848b3a..7db882c074 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFInputStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFInputStream.java @@ -262,14 +262,14 @@ public class AutoLFInputStream extends InputStream { return false; } if (detectBinary) { - isBinary = RawText.isBinary(buf, cnt); + isBinary = RawText.isBinary(buf, cnt, cnt < buf.length); passAsIs = isBinary; detectBinary = false; if (isBinary && abortIfBinary) { throw new IsBinaryException(); } if (!passAsIs && forCheckout) { - passAsIs = RawText.isCrLfText(buf, cnt); + passAsIs = RawText.isCrLfText(buf, cnt, cnt < buf.length); } } ptr = 0; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFOutputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFOutputStream.java index e08a53f502..a0e9fb68c5 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFOutputStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFOutputStream.java @@ -146,16 +146,16 @@ public class AutoLFOutputStream extends OutputStream { binbufcnt += copy; int remaining = len - copy; if (remaining > 0) { - decideMode(); + decideMode(false); } return remaining; } - private void decideMode() throws IOException { + private void decideMode(boolean complete) throws IOException { if (detectBinary) { - isBinary = RawText.isBinary(binbuf, binbufcnt); + isBinary = RawText.isBinary(binbuf, binbufcnt, complete); if (!isBinary) { - isBinary = RawText.isCrLfText(binbuf, binbufcnt); + isBinary = RawText.isCrLfText(binbuf, binbufcnt, complete); } detectBinary = false; } @@ -168,7 +168,7 @@ public class AutoLFOutputStream extends OutputStream { @Override public void flush() throws IOException { if (binbufcnt <= binbuf.length) { - decideMode(); + decideMode(true); } out.flush(); } |