diff options
author | Thomas Wolf <thomas.wolf@paranor.ch> | 2020-07-22 00:51:24 +0200 |
---|---|---|
committer | Thomas Wolf <thomas.wolf@paranor.ch> | 2020-08-17 08:52:55 +0200 |
commit | efd1cc05af7e59a24763dfedb7fc44cda151be50 (patch) | |
tree | 203d3bcf409a1df3229607a89d5901ca52630b3f /org.eclipse.jgit | |
parent | 71aeedb6ec79a91e41251a88c1ab3235c40a9b70 (diff) | |
download | jgit-efd1cc05af7e59a24763dfedb7fc44cda151be50.tar.gz jgit-efd1cc05af7e59a24763dfedb7fc44cda151be50.zip |
Keep line endings for text files committed with CR/LF on text=auto
Git never converts line endings if the version in the repository is a
text file with CR/LF and text=auto. See [1]: "When the file has been
committed with CRLF, no conversion is done."
Because the sentence just before is about converting line endings on
check-in, I had understood that in commit 60cf85a [2] to mean that no
conversion on check-in was to be done. However, as bug 565048 and a
code inspection of the C git code showed it really means no conversion
is done on check-in *or check-out*.
If the text attribute is not set but core.autocrlf = true, this is
the same as text=auto eol=crlf. C git does not convert on check-out
even on text=auto eol=lf if the index version is a text file with
CR/LF.
For check-in, one has to look at the intended target, which is done
in WorkingTreeIterator since commit 60cf85a. For check-out, it can
be done by looking at the source and can thus be done in the
AutoLFOutputStream.
Additionally, provide a constructor for AutoLFInputStream to do
the same; for cases where the equivalent of a check-out is done via
an input stream obtained from a blob. (EGit does that in its
GitBlobStorage for the Eclipse compare framework; it's more efficient
than using a TemporaryBuffer and DirCacheCheckout.getContent(), and
it avoids the need for a temporary file.)
Adapt existing tests, and add new checkout and merge tests to verify
the resulting files have the correct line endings.
EGit's GitBlobStorage will need to call the new version of
EolStreamTypeUtil.wrapInputStream().
[1] https://git-scm.com/docs/gitattributes#Documentation/gitattributes.txt-Settostringvalueauto
[2] https://git.eclipse.org/r/c/jgit/jgit/+/127324
Bug: 565048
Change-Id: If1282ef43e2abd00263541bd10a01fe1f5c619fc
Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
Diffstat (limited to 'org.eclipse.jgit')
3 files changed, 157 insertions, 66 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFInputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFInputStream.java index 8c9b1bf5cc..0e335a9dc4 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFInputStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFInputStream.java @@ -1,6 +1,6 @@ /* * Copyright (C) 2010, 2013 Marc Strapetz <marc.strapetz@syntevo.com> - * Copyright (C) 2015, Ivan Motsch <ivan.motsch@bsiag.com> and others + * Copyright (C) 2015, 2020 Ivan Motsch <ivan.motsch@bsiag.com> and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at @@ -13,26 +13,58 @@ package org.eclipse.jgit.util.io; import java.io.IOException; import java.io.InputStream; +import java.util.Arrays; +import java.util.EnumSet; +import java.util.Set; import org.eclipse.jgit.diff.RawText; /** * An InputStream that normalizes CRLF to LF. - * - * Existing single CR are not changed to LF, but retained as is. - * - * Optionally, a binary check on the first 8000 bytes is performed and in case - * of binary files, canonicalization is turned off (for the complete file). * <p> - * This is the former EolCanonicalizingInputStream with a new name in order to - * have same naming for all LF / CRLF streams + * Existing single CR are not changed to LF but are retained as is. + * </p> + * <p> + * Optionally, a binary check on the first 8kB is performed and in case of + * binary files, canonicalization is turned off (for the complete file). If + * binary checking determines that the input is CR/LF-delimited text and the + * stream has been created for checkout, canonicalization is also turned off. + * </p> * * @since 4.3 */ public class AutoLFInputStream extends InputStream { + + // This is the former EolCanonicalizingInputStream with a new name in order + // to have same naming for all LF / CRLF streams. + + /** + * Flags for controlling auto-detection of binary vs. text content (for + * text=auto). + * + * @since 5.9 + */ + public enum StreamFlag { + /** + * Check the first 8kB for binary content and switch off + * canonicalization off for the whole file if so. + */ + DETECT_BINARY, + /** + * If {@link #DETECT_BINARY} is set, throw an {@link IsBinaryException} + * if binary content is detected. + */ + ABORT_IF_BINARY, + /** + * If {@link #DETECT_BINARY} is set and content is found to be CR-LF + * delimited text, switch off canonicalization. + */ + FOR_CHECKOUT + } + private final byte[] single = new byte[1]; - private final byte[] buf = new byte[8096]; + private final byte[] buf = new byte[8 * 1024]; private final InputStream in; @@ -40,11 +72,23 @@ public class AutoLFInputStream extends InputStream { private int ptr; + /** + * Set to {@code true} if no CR/LF processing is to be done: if the input is + * binary data, or CR/LF-delimited text and {@link StreamFlag#FOR_CHECKOUT} + * was given. + */ + private boolean passAsIs; + + /** + * Set to {@code true} if the input was detected to be binary data. + */ private boolean isBinary; private boolean detectBinary; - private boolean abortIfBinary; + private final boolean abortIfBinary; + + private final boolean forCheckout; /** * A special exception thrown when {@link AutoLFInputStream} is told to @@ -62,20 +106,64 @@ public class AutoLFInputStream extends InputStream { } /** - * Creates a new InputStream, wrapping the specified stream + * Factory method for creating an {@link AutoLFInputStream} with the + * specified {@link StreamFlag flags}. + * + * @param in + * raw input stream + * @param flags + * {@link StreamFlag}s controlling the stream behavior + * @return a new {@link AutoLFInputStream} + * @since 5.9 + */ + public static AutoLFInputStream create(InputStream in, + StreamFlag... flags) { + if (flags == null) { + return new AutoLFInputStream(in, null); + } + EnumSet<StreamFlag> set = EnumSet.noneOf(StreamFlag.class); + set.addAll(Arrays.asList(flags)); + return new AutoLFInputStream(in, set); + } + + /** + * Creates a new InputStream, wrapping the specified stream. + * + * @param in + * raw input stream + * @param flags + * {@link StreamFlag}s controlling the stream behavior; + * {@code null} is treated as an empty set + * @since 5.9 + */ + public AutoLFInputStream(InputStream in, Set<StreamFlag> flags) { + this.in = in; + this.detectBinary = flags != null + && flags.contains(StreamFlag.DETECT_BINARY); + this.abortIfBinary = flags != null + && flags.contains(StreamFlag.ABORT_IF_BINARY); + this.forCheckout = flags != null + && flags.contains(StreamFlag.FOR_CHECKOUT); + } + + /** + * Creates a new InputStream, wrapping the specified stream. * * @param in * raw input stream * @param detectBinary * whether binaries should be detected * @since 2.0 + * @deprecated since 5.9, use {@link #create(InputStream, StreamFlag...)} + * instead */ + @Deprecated public AutoLFInputStream(InputStream in, boolean detectBinary) { this(in, detectBinary, false); } /** - * Creates a new InputStream, wrapping the specified stream + * Creates a new InputStream, wrapping the specified stream. * * @param in * raw input stream @@ -84,12 +172,16 @@ public class AutoLFInputStream extends InputStream { * @param abortIfBinary * throw an IOException if the file is binary * @since 3.3 + * @deprecated since 5.9, use {@link #create(InputStream, StreamFlag...)} + * instead */ + @Deprecated public AutoLFInputStream(InputStream in, boolean detectBinary, boolean abortIfBinary) { this.in = in; this.detectBinary = detectBinary; this.abortIfBinary = abortIfBinary; + this.forCheckout = false; } /** {@inheritDoc} */ @@ -118,7 +210,7 @@ public class AutoLFInputStream extends InputStream { } byte b = buf[ptr++]; - if (isBinary || b != '\r') { + if (passAsIs || b != '\r') { // Logic for binary files ends here bs[i++] = b; continue; @@ -170,9 +262,14 @@ public class AutoLFInputStream extends InputStream { } if (detectBinary) { isBinary = RawText.isBinary(buf, cnt); + passAsIs = isBinary; detectBinary = false; - if (isBinary && abortIfBinary) + if (isBinary && abortIfBinary) { throw new IsBinaryException(); + } + if (!passAsIs && forCheckout) { + passAsIs = RawText.isCrLfText(buf, cnt); + } } ptr = 0; return true; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFOutputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFOutputStream.java index e235aa0ed4..195fdb4213 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFOutputStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFOutputStream.java @@ -1,43 +1,12 @@ /* * Copyright (C) 2015, Ivan Motsch <ivan.motsch@bsiag.com> + * Copyright (C) 2020, Thomas Wolf <thomas.wolf@paranor.ch> and others * - * This program and the accompanying materials are made available - * under the terms of the Eclipse Distribution License v1.0 which - * accompanies this distribution, is reproduced below, and is - * available at http://www.eclipse.org/org/documents/edl-v10.php + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * - Neither the name of the Eclipse Foundation, Inc. nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND - * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.util.io; @@ -49,11 +18,15 @@ import org.eclipse.jgit.diff.RawText; /** * An OutputStream that reduces CRLF to LF. - * + * <p> * Existing single CR are not changed to LF, but retained as is. - * + * </p> + * <p> * A binary check on the first 8000 bytes is performed and in case of binary - * files, canonicalization is turned off (for the complete file). + * files, canonicalization is turned off (for the complete file). If the binary + * check determines that the input is not binary but text with CR/LF, + * canonicalization is also turned off. + * </p> * * @since 4.3 */ @@ -76,9 +49,7 @@ public class AutoLFOutputStream extends OutputStream { private boolean isBinary; /** - * <p> * Constructor for AutoLFOutputStream. - * </p> * * @param out * an {@link java.io.OutputStream} object. @@ -88,9 +59,7 @@ public class AutoLFOutputStream extends OutputStream { } /** - * <p> * Constructor for AutoLFOutputStream. - * </p> * * @param out * an {@link java.io.OutputStream} object. @@ -123,14 +92,11 @@ public class AutoLFOutputStream extends OutputStream { public void write(byte[] b, int startOff, int startLen) throws IOException { final int overflow = buffer(b, startOff, startLen); - if (overflow < 0) { + if (overflow <= 0) { return; } final int off = startOff + startLen - overflow; final int len = overflow; - if (len == 0) { - return; - } int lastw = off; if (isBinary) { out.write(b, off, len); @@ -190,6 +156,9 @@ public class AutoLFOutputStream extends OutputStream { private void decideMode() throws IOException { if (detectBinary) { isBinary = RawText.isBinary(binbuf, binbufcnt); + if (!isBinary) { + isBinary = RawText.isCrLfText(binbuf, binbufcnt); + } detectBinary = false; } int cachedLen = binbufcnt; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolStreamTypeUtil.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolStreamTypeUtil.java index c33c869b64..88ee2aee88 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolStreamTypeUtil.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolStreamTypeUtil.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015, Ivan Motsch <ivan.motsch@bsiag.com> and others + * Copyright (C) 2015, 2020 Ivan Motsch <ivan.motsch@bsiag.com> and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at @@ -12,12 +12,14 @@ package org.eclipse.jgit.util.io; import java.io.InputStream; import java.io.OutputStream; +import java.util.EnumSet; import org.eclipse.jgit.attributes.Attributes; import org.eclipse.jgit.lib.CoreConfig.EolStreamType; import org.eclipse.jgit.treewalk.TreeWalk.OperationType; import org.eclipse.jgit.treewalk.WorkingTreeOptions; import org.eclipse.jgit.util.SystemReader; +import org.eclipse.jgit.util.io.AutoLFInputStream.StreamFlag; /** * Utility used to create input and output stream wrappers for @@ -71,7 +73,7 @@ public final class EolStreamTypeUtil { /** * Wrap the input stream depending on - * {@link org.eclipse.jgit.lib.CoreConfig.EolStreamType} + * {@link org.eclipse.jgit.lib.CoreConfig.EolStreamType}. * * @param in * original stream @@ -82,15 +84,38 @@ public final class EolStreamTypeUtil { */ public static InputStream wrapInputStream(InputStream in, EolStreamType conversion) { + return wrapInputStream(in, conversion, false); + } + + /** + * Wrap the input stream depending on + * {@link org.eclipse.jgit.lib.CoreConfig.EolStreamType}. + * + * @param in + * original stream + * @param conversion + * to be performed + * @param forCheckout + * whether the stream is for checking out from the repository + * @return the converted stream depending on + * {@link org.eclipse.jgit.lib.CoreConfig.EolStreamType} + * @since 5.9 + */ + public static InputStream wrapInputStream(InputStream in, + EolStreamType conversion, boolean forCheckout) { switch (conversion) { case TEXT_CRLF: return new AutoCRLFInputStream(in, false); case TEXT_LF: - return new AutoLFInputStream(in, false); + return AutoLFInputStream.create(in); case AUTO_CRLF: return new AutoCRLFInputStream(in, true); case AUTO_LF: - return new AutoLFInputStream(in, true); + EnumSet<StreamFlag> flags = forCheckout + ? EnumSet.of(StreamFlag.DETECT_BINARY, + StreamFlag.FOR_CHECKOUT) + : EnumSet.of(StreamFlag.DETECT_BINARY); + return new AutoLFInputStream(in, flags); default: return in; } @@ -98,7 +123,7 @@ public final class EolStreamTypeUtil { /** * Wrap the output stream depending on - * {@link org.eclipse.jgit.lib.CoreConfig.EolStreamType} + * {@link org.eclipse.jgit.lib.CoreConfig.EolStreamType}. * * @param out * original stream |