aboutsummaryrefslogtreecommitdiffstats
path: root/org.eclipse.jgit/src/org/eclipse/jgit/diff
diff options
context:
space:
mode:
authorThomas Wolf <thomas.wolf@paranor.ch>2021-03-10 14:25:37 +0100
committerMatthias Sohn <matthias.sohn@sap.com>2021-05-26 00:38:00 +0200
commit76b76a6048b9b3dbca965463d4f6f5732ffb784d (patch)
tree3726b3ccdaed12b93b6a50408bbf8006b72888fb /org.eclipse.jgit/src/org/eclipse/jgit/diff
parent10ac4499115965ff10e547a0632c89873a06cf91 (diff)
downloadjgit-76b76a6048b9b3dbca965463d4f6f5732ffb784d.tar.gz
jgit-76b76a6048b9b3dbca965463d4f6f5732ffb784d.zip
ApplyCommand: use byte arrays for text patches, not strings
Instead of converting the patch bytes to strings apply the patch on byte level, like C git does. Converting the input lines and the hunk lines from bytes to strings and then applying the patch based on strings may give surprising results if a patch converts a text file from one encoding to another. Moreover, in the end we don't know which encoding to use to write the result. Previous code just wrote the result as UTF-8, which forcibly changed the encoding if the original input had some other encoding (even if the patch had the same non-UTF-8 encoding). It was also wrong if the input was UTF-8, and the patch should have changed the encoding to something else. So use ByteBuffers instead of Strings. This has the additional advantage that all these ByteBuffers can share the underlying byte arrays of the input and of the patch, so it also reduces memory consumption. Change-Id: I450975f2ba0e7d0bec8973e3113cc2e7aea187ee Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
Diffstat (limited to 'org.eclipse.jgit/src/org/eclipse/jgit/diff')
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java24
1 files changed, 23 insertions, 1 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
index 9f4b1fa493..d09da019dd 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2009, Google Inc.
- * Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de> and others
+ * Copyright (C) 2008-2021, Johannes E. Schindelin <johannes.schindelin@gmx.de> and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
@@ -16,6 +16,7 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.nio.ByteBuffer;
import org.eclipse.jgit.errors.BinaryBlobException;
import org.eclipse.jgit.errors.LargeObjectException;
@@ -165,6 +166,27 @@ public class RawText extends Sequence {
}
/**
+ * Get the raw text for a single line.
+ *
+ * @param i
+ * index of the line to extract. Note this is 0-based, so line
+ * number 1 is actually index 0.
+ * @return the text for the line, without a trailing LF, as a
+ * {@link ByteBuffer} that is backed by a slice of the
+ * {@link #getRawContent() raw content}, with the buffer's position
+ * on the start of the line and the limit at the end.
+ * @since 5.12
+ */
+ public ByteBuffer getRawString(int i) {
+ int s = getStart(i);
+ int e = getEnd(i);
+ if (e > 0 && content[e - 1] == '\n') {
+ e--;
+ }
+ return ByteBuffer.wrap(content, s, e - s);
+ }
+
+ /**
* Get the text for a region of lines.
*
* @param begin