diff options
author | Thomas Wolf <thomas.wolf@paranor.ch> | 2021-03-10 14:25:37 +0100 |
---|---|---|
committer | Matthias Sohn <matthias.sohn@sap.com> | 2021-05-26 00:38:00 +0200 |
commit | 76b76a6048b9b3dbca965463d4f6f5732ffb784d (patch) | |
tree | 3726b3ccdaed12b93b6a50408bbf8006b72888fb /org.eclipse.jgit.test | |
parent | 10ac4499115965ff10e547a0632c89873a06cf91 (diff) | |
download | jgit-76b76a6048b9b3dbca965463d4f6f5732ffb784d.tar.gz jgit-76b76a6048b9b3dbca965463d4f6f5732ffb784d.zip |
ApplyCommand: use byte arrays for text patches, not strings
Instead of converting the patch bytes to strings apply the patch on
byte level, like C git does. Converting the input lines and the hunk
lines from bytes to strings and then applying the patch based on
strings may give surprising results if a patch converts a text file
from one encoding to another. Moreover, in the end we don't know which
encoding to use to write the result.
Previous code just wrote the result as UTF-8, which forcibly changed
the encoding if the original input had some other encoding (even if the
patch had the same non-UTF-8 encoding). It was also wrong if the input
was UTF-8, and the patch should have changed the encoding to something
else.
So use ByteBuffers instead of Strings. This has the additional advantage
that all these ByteBuffers can share the underlying byte arrays of the
input and of the patch, so it also reduces memory consumption.
Change-Id: I450975f2ba0e7d0bec8973e3113cc2e7aea187ee
Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
Diffstat (limited to 'org.eclipse.jgit.test')
5 files changed, 19 insertions, 1 deletions
diff --git a/org.eclipse.jgit.test/.settings/org.eclipse.core.resources.prefs b/org.eclipse.jgit.test/.settings/org.eclipse.core.resources.prefs index 6a9621db1d..cddb99d1d5 100644 --- a/org.eclipse.jgit.test/.settings/org.eclipse.core.resources.prefs +++ b/org.eclipse.jgit.test/.settings/org.eclipse.core.resources.prefs @@ -1,5 +1,6 @@ -#Sat Dec 20 21:21:24 CET 2008 eclipse.preferences.version=1 +encoding//tst-rsrc/org/eclipse/jgit/diff/umlaut.patch=ISO-8859-1 +encoding//tst-rsrc/org/eclipse/jgit/diff/umlaut_PostImage=ISO-8859-1 encoding//tst-rsrc/org/eclipse/jgit/patch/testGetText_BothISO88591.patch=ISO-8859-1 encoding//tst-rsrc/org/eclipse/jgit/patch/testGetText_Convert.patch=ISO-8859-1 encoding//tst-rsrc/org/eclipse/jgit/patch/testGetText_DiffCc.patch=ISO-8859-1 diff --git a/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut.patch b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut.patch new file mode 100644 index 0000000000..7380dbed82 --- /dev/null +++ b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut.patch @@ -0,0 +1,7 @@ +diff --git a/umlaut b/umlaut +index 003a054..557f72f 100644 +--- a/umlaut ++++ b/umlaut +@@ -1 +1 @@ +-ÄÖÜ ++ÄÖÜ diff --git a/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut_PostImage b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut_PostImage new file mode 100644 index 0000000000..557f72f513 --- /dev/null +++ b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut_PostImage @@ -0,0 +1 @@ +ÄÖÜ diff --git a/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut_PreImage b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut_PreImage new file mode 100644 index 0000000000..003a054114 --- /dev/null +++ b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut_PreImage @@ -0,0 +1 @@ +ÄÖÜ diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/ApplyCommandTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/ApplyCommandTest.java index e9b8924e6f..b997ac009a 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/ApplyCommandTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/ApplyCommandTest.java @@ -288,6 +288,14 @@ public class ApplyCommandTest extends RepositoryTestCase { } @Test + public void testEncodingChange() throws Exception { + // This is a text patch that changes a file containing ÄÖÜ in UTF-8 to + // the same characters in ISO-8859-1. The patch file itself uses mixed + // encoding. Since checkFile() works with strings use the binary check. + checkBinary("umlaut", true); + } + + @Test public void testAddA1() throws Exception { ApplyResult result = init("A1", false, true); assertEquals(1, result.getUpdatedFiles().size()); |