diff options
author | Thomas Wolf <thomas.wolf@paranor.ch> | 2021-10-31 01:35:52 +0200 |
---|---|---|
committer | Thomas Wolf <thomas.wolf@paranor.ch> | 2021-10-31 13:02:04 +0100 |
commit | 83eddaf7fda22ca64e9c3852df67ccef0dacdaf5 (patch) | |
tree | da257cd574e6b83c93bc277a5ea5cad742c0630d /org.eclipse.jgit.test/tst/org/eclipse | |
parent | 3444a3be8c8a567f944fd7b81838e615852d787a (diff) | |
download | jgit-83eddaf7fda22ca64e9c3852df67ccef0dacdaf5.tar.gz jgit-83eddaf7fda22ca64e9c3852df67ccef0dacdaf5.zip |
Binary and CR-LF detection: lone CRs -> binary
C git considers not only files containing NUL bytes as binary but also
files containing lone CRs. Implement this also for JGit.
C git additionally counts printable vs. non-printable characters and
considers files that have non_printable_count > printable_count / 128
also as binary. This is not implemented because such counting probably
only makes sense if one looks at the full file or blob content. The
Auto[CR]LF* streams in JGit look only at the first few KiB of a stream
in order not to buffer too much.
For the C git implementation, see [1].
[1] https://github.com/git/git/blob/7e27bd589d/convert.c#L35
Bug: 576971
Change-Id: Ia169b59bdbf1477f32ee2014eeb8406f81d4b1ab
Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
Diffstat (limited to 'org.eclipse.jgit.test/tst/org/eclipse')
3 files changed, 53 insertions, 40 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/EolStreamTypeUtilTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/EolStreamTypeUtilTest.java index 673aa1e9c0..f8a6632918 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/EolStreamTypeUtilTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/EolStreamTypeUtilTest.java @@ -81,7 +81,8 @@ public class EolStreamTypeUtilTest { testCheckout(TEXT_CRLF, AUTO_CRLF, "\n", "\r\n"); testCheckout(TEXT_CRLF, AUTO_CRLF, "\r\n", "\r\n"); - testCheckout(TEXT_CRLF, AUTO_CRLF, "\n\r", "\r\n\r"); + testCheckout(TEXT_CRLF, null, "\n\r", "\r\n\r"); + testCheckout(null, AUTO_CRLF, "\n\r", "\n\r"); // Lone CR testCheckout(null, AUTO_CRLF, "\n\r\n", "\n\r\n"); testCheckout(TEXT_CRLF, null, "\n\r\n", "\r\n\r\n"); @@ -89,7 +90,8 @@ public class EolStreamTypeUtilTest { testCheckout(TEXT_CRLF, AUTO_CRLF, "a\nb\n", "a\r\nb\r\n"); testCheckout(TEXT_CRLF, AUTO_CRLF, "a\rb\r", "a\rb\r"); - testCheckout(TEXT_CRLF, AUTO_CRLF, "a\n\rb\n\r", "a\r\n\rb\r\n\r"); + testCheckout(TEXT_CRLF, null, "a\n\rb\n\r", "a\r\n\rb\r\n\r"); + testCheckout(null, AUTO_CRLF, "a\n\rb\n\r", "a\n\rb\n\r"); // Lone CR testCheckout(TEXT_CRLF, AUTO_CRLF, "a\r\nb\r\n", "a\r\nb\r\n"); } @@ -199,7 +201,8 @@ public class EolStreamTypeUtilTest { testCheckin(TEXT_LF, AUTO_LF, "\n\r", "\n\r"); testCheckin(TEXT_LF, AUTO_LF, "\n\r\n", "\n\n"); - testCheckin(TEXT_LF, AUTO_LF, "\r\n\r", "\n\r"); + testCheckin(TEXT_LF, null, "\r\n\r", "\n\r"); + testCheckin(null, AUTO_LF, "\r\n\r", "\r\n\r"); // Lone CR testCheckin(TEXT_LF, AUTO_LF, "a\nb\n", "a\nb\n"); testCheckin(TEXT_LF, AUTO_LF, "a\rb\r", "a\rb\r"); @@ -214,14 +217,16 @@ public class EolStreamTypeUtilTest { testCheckin(TEXT_CRLF, AUTO_CRLF, "\n", "\r\n"); testCheckin(TEXT_CRLF, AUTO_CRLF, "\r\n", "\r\n"); - testCheckin(TEXT_CRLF, AUTO_CRLF, "\n\r", "\r\n\r"); + testCheckin(TEXT_CRLF, null, "\n\r", "\r\n\r"); + testCheckin(null, AUTO_CRLF, "\n\r", "\n\r"); // Lone CR testCheckin(TEXT_CRLF, AUTO_CRLF, "\n\r\n", "\r\n\r\n"); testCheckin(TEXT_CRLF, AUTO_CRLF, "\r\n\r", "\r\n\r"); testCheckin(TEXT_CRLF, AUTO_CRLF, "a\nb\n", "a\r\nb\r\n"); testCheckin(TEXT_CRLF, AUTO_CRLF, "a\rb\r", "a\rb\r"); - testCheckin(TEXT_CRLF, AUTO_CRLF, "a\n\rb\n\r", "a\r\n\rb\r\n\r"); + testCheckin(TEXT_CRLF, null, "a\n\rb\n\r", "a\r\n\rb\r\n\r"); + testCheckin(null, AUTO_CRLF, "a\n\rb\n\r", "a\n\rb\n\r"); // Lone CR testCheckin(TEXT_CRLF, AUTO_CRLF, "a\r\nb\r\n", "a\r\nb\r\n"); } @@ -257,47 +262,55 @@ public class EolStreamTypeUtilTest { byte[] inputBytes = input.getBytes(UTF_8); byte[] expectedConversionBytes = expectedConversion.getBytes(UTF_8); - // test using input text and assuming it was declared TEXT - try (InputStream in = EolStreamTypeUtil.wrapInputStream( - new ByteArrayInputStream(inputBytes), - streamTypeText)) { - byte[] b = new byte[1024]; - int len = IO.readFully(in, b, 0); - assertArrayEquals(expectedConversionBytes, Arrays.copyOf(b, len)); + if (streamTypeText != null) { + // test using input text and assuming it was declared TEXT + try (InputStream in = EolStreamTypeUtil.wrapInputStream( + new ByteArrayInputStream(inputBytes), streamTypeText)) { + byte[] b = new byte[1024]; + int len = IO.readFully(in, b, 0); + assertArrayEquals(expectedConversionBytes, + Arrays.copyOf(b, len)); + } } - // test using input text and assuming it was declared AUTO, using binary - // detection - try (InputStream in = EolStreamTypeUtil.wrapInputStream( - new ByteArrayInputStream(inputBytes), - streamTypeWithBinaryCheck)) { - byte[] b = new byte[1024]; - int len = IO.readFully(in, b, 0); - assertArrayEquals(expectedConversionBytes, Arrays.copyOf(b, len)); + if (streamTypeWithBinaryCheck != null) { + // test using input text and assuming it was declared AUTO, using + // binary detection + try (InputStream in = EolStreamTypeUtil.wrapInputStream( + new ByteArrayInputStream(inputBytes), + streamTypeWithBinaryCheck)) { + byte[] b = new byte[1024]; + int len = IO.readFully(in, b, 0); + assertArrayEquals(expectedConversionBytes, + Arrays.copyOf(b, len)); + } } - // now pollute input text with some binary bytes inputBytes = extendWithBinaryData(inputBytes); expectedConversionBytes = extendWithBinaryData(expectedConversionBytes); - // again, test using input text and assuming it was declared TEXT - try (InputStream in = EolStreamTypeUtil.wrapInputStream( - new ByteArrayInputStream(inputBytes), streamTypeText)) { - byte[] b = new byte[1024]; - int len = IO.readFully(in, b, 0); - assertArrayEquals(expectedConversionBytes, Arrays.copyOf(b, len)); + if (streamTypeText != null) { + // again, test using input text and assuming it was declared TEXT + try (InputStream in = EolStreamTypeUtil.wrapInputStream( + new ByteArrayInputStream(inputBytes), streamTypeText)) { + byte[] b = new byte[1024]; + int len = IO.readFully(in, b, 0); + assertArrayEquals(expectedConversionBytes, + Arrays.copyOf(b, len)); + } } - // again, test using input text and assuming it was declared AUTO, using - // binary - // detection - try (InputStream in = EolStreamTypeUtil.wrapInputStream( - new ByteArrayInputStream(inputBytes), - streamTypeWithBinaryCheck)) { - byte[] b = new byte[1024]; - int len = IO.readFully(in, b, 0); - // expect no conversion - assertArrayEquals(inputBytes, Arrays.copyOf(b, len)); + if (streamTypeWithBinaryCheck != null) { + // again, test using input text and assuming it was declared AUTO, + // using binary detection + try (InputStream in = EolStreamTypeUtil.wrapInputStream( + new ByteArrayInputStream(inputBytes), + streamTypeWithBinaryCheck)) { + byte[] b = new byte[1024]; + int len = IO.readFully(in, b, 0); + // expect no conversion + assertArrayEquals(inputBytes, Arrays.copyOf(b, len)); + } } } diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/AutoCRLFInputStreamTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/AutoCRLFInputStreamTest.java index cd4e503390..94429924b0 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/AutoCRLFInputStreamTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/AutoCRLFInputStreamTest.java @@ -30,10 +30,10 @@ public class AutoCRLFInputStreamTest { assertNoCrLf("\r\n", "\n"); assertNoCrLf("\r\n", "\r\n"); assertNoCrLf("\r\r", "\r\r"); - assertNoCrLf("\r\n\r", "\n\r"); + assertNoCrLf("\n\r", "\n\r"); // Lone CR assertNoCrLf("\r\n\r\r", "\r\n\r\r"); assertNoCrLf("\r\n\r\n", "\r\n\r\n"); - assertNoCrLf("\r\n\r\n\r", "\n\r\n\r"); + assertNoCrLf("\n\r\n\r", "\n\r\n\r"); // Lone CR assertNoCrLf("\0\n", "\0\n"); } diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/AutoCRLFOutputStreamTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/AutoCRLFOutputStreamTest.java index 150df08453..791727f734 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/AutoCRLFOutputStreamTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/AutoCRLFOutputStreamTest.java @@ -32,7 +32,7 @@ public class AutoCRLFOutputStreamTest { assertNoCrLf("\r\n", "\n"); assertNoCrLf("\r\n", "\r\n"); assertNoCrLf("\r\r", "\r\r"); - assertNoCrLf("\r\n\r", "\n\r"); + assertNoCrLf("\n\r", "\n\r"); // Lone CR assertNoCrLf("\r\n\r\r", "\r\n\r\r"); assertNoCrLf("\r\n\r\n", "\r\n\r\n"); assertNoCrLf("\n\r\n\r", "\n\r\n\r"); |