]> source.dussan.org Git - jgit.git/commitdiff
Fix core.autocrlf for non-normalized index 24/127324/11
authorThomas Wolf <thomas.wolf@paranor.ch>
Sun, 12 Aug 2018 17:30:36 +0000 (19:30 +0200)
committerDavid Pursehouse <david.pursehouse@gmail.com>
Tue, 26 Feb 2019 07:18:27 +0000 (16:18 +0900)
With text=auto or core.autocrlf=true, git does not normalize upon
check-in if the file in the index contains already CR/LFs. The
documentation says: "When text is set to "auto", the path is
marked for automatic end-of-line conversion. If Git decides that
the content is text, its line endings are converted to LF on
checkin. When the file has been committed with CRLF, no conversion
is done."[1]

Implement the last bit as in canonical git: check the blob in the
index for CR/LFs. For very large files, we check only the first 8000
bytes, like RawText.isBinary() and AutoLFInputStream do.

In Auto(CR)LFInputStream, ensure that the buffer is filled as much as
possible for the isBinary() check.

Regarding these content checks, there are a number of inconsistencies:

* Canonical git considers files containing lone CRs as binary.
* RawText checks the first 8000 bytes.
* Auto(CR)LFInputStream checks the first 8096 (not 8192!) bytes.

None of these are changed with this commit. It appears that canonical
git will check the whole blob, not just the first 8k bytes. Also
note: the check for CR/LF text won't work with LFS (neither in JGit
nor in git) since the blob data is not run through the smudge filter.
C.f. [2].

Two tests in AddCommandTest actually tested that normalization was
done even if the file was already committed with CR/LF.These tests
had to be adapted. I find the git documentation unclear about the
case where core.autocrlf=input, but from [3] it looks as if this
non-normalization also applies in this case.

Add new tests in CommitCommandTest testing this for the case where
the index entry is for a merge conflict. In this case, canonical git
uses the "ours" version.[4] Do the same.

[1] https://git-scm.com/docs/gitattributes
[2] https://github.com/git/git/blob/3434569fc/convert.c#L225
[3] https://github.com/git/git/blob/3434569fc/convert.c#L529
[4] https://github.com/git/git/blob/f2b6aa98b/read-cache.c#L3281

Bug: 470643
Change-Id: Ie7310539fbe6c737d78b1dcc29e34735d4616b88
Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
org.eclipse.jgit.test/tst/org/eclipse/jgit/api/AddCommandTest.java
org.eclipse.jgit.test/tst/org/eclipse/jgit/api/CommitCommandTest.java
org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java
org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java
org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoCRLFInputStream.java
org.eclipse.jgit/src/org/eclipse/jgit/util/io/AutoLFInputStream.java

index 1a5793ce31d6f1ca3d08e50af685731f04596313..3fee51a885250cab92a7c15902d6d19d358452fd 100644 (file)
@@ -501,11 +501,11 @@ public class AddCommandTest extends RepositoryTestCase {
                                        indexState(CONTENT));
                        db.getConfig().setString("core", null, "autocrlf", "true");
                        git.add().addFilepattern("a.txt").call();
-                       assertEquals("[a.txt, mode:100644, content:row1\nrow2]",
+                       assertEquals("[a.txt, mode:100644, content:row1\r\nrow2]",
                                        indexState(CONTENT));
                        db.getConfig().setString("core", null, "autocrlf", "input");
                        git.add().addFilepattern("a.txt").call();
-                       assertEquals("[a.txt, mode:100644, content:row1\nrow2]",
+                       assertEquals("[a.txt, mode:100644, content:row1\r\nrow2]",
                                        indexState(CONTENT));
                }
        }
@@ -523,19 +523,18 @@ public class AddCommandTest extends RepositoryTestCase {
                try (PrintWriter writer = new PrintWriter(file, UTF_8.name())) {
                        writer.print(crData);
                }
-               String lfData = data.toString().replaceAll("\r", "");
                try (Git git = new Git(db)) {
                        db.getConfig().setString("core", null, "autocrlf", "false");
                        git.add().addFilepattern("a.txt").call();
-                       assertEquals("[a.txt, mode:100644, content:" + data + "]",
+                       assertEquals("[a.txt, mode:100644, content:" + crData + "]",
                                        indexState(CONTENT));
                        db.getConfig().setString("core", null, "autocrlf", "true");
                        git.add().addFilepattern("a.txt").call();
-                       assertEquals("[a.txt, mode:100644, content:" + lfData + "]",
+                       assertEquals("[a.txt, mode:100644, content:" + crData + "]",
                                        indexState(CONTENT));
                        db.getConfig().setString("core", null, "autocrlf", "input");
                        git.add().addFilepattern("a.txt").call();
-                       assertEquals("[a.txt, mode:100644, content:" + lfData + "]",
+                       assertEquals("[a.txt, mode:100644, content:" + crData + "]",
                                        indexState(CONTENT));
                }
        }
index b76d8f987b8d50d383ca87f550e1bc693e1edf72..cd96f41a4205b21336813e6b768a5868c3495220 100644 (file)
@@ -56,6 +56,7 @@ import java.util.List;
 import java.util.TimeZone;
 import java.util.concurrent.atomic.AtomicInteger;
 
+import org.eclipse.jgit.api.CherryPickResult.CherryPickStatus;
 import org.eclipse.jgit.api.errors.CanceledException;
 import org.eclipse.jgit.api.errors.EmptyCommitException;
 import org.eclipse.jgit.api.errors.WrongRepositoryStateException;
@@ -77,6 +78,7 @@ import org.eclipse.jgit.lib.ReflogEntry;
 import org.eclipse.jgit.lib.Repository;
 import org.eclipse.jgit.lib.StoredConfig;
 import org.eclipse.jgit.revwalk.RevCommit;
+import org.eclipse.jgit.storage.file.FileBasedConfig;
 import org.eclipse.jgit.submodule.SubmoduleWalk;
 import org.eclipse.jgit.transport.CredentialsProvider;
 import org.eclipse.jgit.treewalk.TreeWalk;
@@ -620,9 +622,100 @@ public class CommitCommandTest extends RepositoryTestCase {
                        writeTrashFile(".gitignore", "bar");
                        git.add().addFilepattern("subdir").call();
                        git.commit().setOnly("subdir").setMessage("first commit").call();
+                       assertEquals("[subdir/foo, mode:100644, content:Hello World]",
+                                       indexState(CONTENT));
                }
        }
 
+       @Test
+       public void commitWithAutoCrlfAndNonNormalizedIndex() throws Exception {
+               try (Git git = new Git(db)) {
+                       // Commit a file with CR/LF into the index
+                       FileBasedConfig config = db.getConfig();
+                       config.setString("core", null, "autocrlf", "false");
+                       config.save();
+                       writeTrashFile("file.txt", "line 1\r\nline 2\r\n");
+                       git.add().addFilepattern("file.txt").call();
+                       git.commit().setMessage("Initial").call();
+                       assertEquals(
+                                       "[file.txt, mode:100644, content:line 1\r\nline 2\r\n]",
+                                       indexState(CONTENT));
+                       config.setString("core", null, "autocrlf", "true");
+                       config.save();
+                       writeTrashFile("file.txt", "line 1\r\nline 1.5\r\nline 2\r\n");
+                       writeTrashFile("file2.txt", "new\r\nfile\r\n");
+                       git.add().addFilepattern("file.txt").addFilepattern("file2.txt")
+                                       .call();
+                       git.commit().setMessage("Second").call();
+                       assertEquals(
+                                       "[file.txt, mode:100644, content:line 1\r\nline 1.5\r\nline 2\r\n]"
+                                                       + "[file2.txt, mode:100644, content:new\nfile\n]",
+                                       indexState(CONTENT));
+                       writeTrashFile("file2.txt", "new\r\nfile\r\ncontent\r\n");
+                       git.add().addFilepattern("file2.txt").call();
+                       git.commit().setMessage("Third").call();
+                       assertEquals(
+                                       "[file.txt, mode:100644, content:line 1\r\nline 1.5\r\nline 2\r\n]"
+                                                       + "[file2.txt, mode:100644, content:new\nfile\ncontent\n]",
+                                       indexState(CONTENT));
+               }
+       }
+
+       private void testConflictWithAutoCrlf(String baseLf, String lf)
+                       throws Exception {
+               try (Git git = new Git(db)) {
+                       // Commit a file with CR/LF into the index
+                       FileBasedConfig config = db.getConfig();
+                       config.setString("core", null, "autocrlf", "false");
+                       config.save();
+                       writeTrashFile("file.txt", "foo" + baseLf);
+                       git.add().addFilepattern("file.txt").call();
+                       git.commit().setMessage("Initial").call();
+                       // Switch to side branch
+                       git.checkout().setCreateBranch(true).setName("side").call();
+                       writeTrashFile("file.txt", "bar\r\n");
+                       git.add().addFilepattern("file.txt").call();
+                       RevCommit side = git.commit().setMessage("Side").call();
+                       // Switch back to master and commit a conflict with the given lf
+                       git.checkout().setName("master");
+                       writeTrashFile("file.txt", "foob" + lf);
+                       git.add().addFilepattern("file.txt").call();
+                       git.commit().setMessage("Second").call();
+                       // Switch on autocrlf=true
+                       config.setString("core", null, "autocrlf", "true");
+                       config.save();
+                       // Cherry pick side: conflict. Resolve with CR-LF and commit.
+                       CherryPickResult pick = git.cherryPick().include(side).call();
+                       assertEquals("Expected a cherry-pick conflict",
+                                       CherryPickStatus.CONFLICTING, pick.getStatus());
+                       writeTrashFile("file.txt", "foobar\r\n");
+                       git.add().addFilepattern("file.txt").call();
+                       git.commit().setMessage("Second").call();
+                       assertEquals("[file.txt, mode:100644, content:foobar" + lf + "]",
+                                       indexState(CONTENT));
+               }
+       }
+
+       @Test
+       public void commitConflictWithAutoCrlfBaseCrLfOursLf() throws Exception {
+               testConflictWithAutoCrlf("\r\n", "\n");
+       }
+
+       @Test
+       public void commitConflictWithAutoCrlfBaseLfOursLf() throws Exception {
+               testConflictWithAutoCrlf("\n", "\n");
+       }
+
+       @Test
+       public void commitConflictWithAutoCrlfBasCrLfOursCrLf() throws Exception {
+               testConflictWithAutoCrlf("\r\n", "\r\n");
+       }
+
+       @Test
+       public void commitConflictWithAutoCrlfBaseLfOursCrLf() throws Exception {
+               testConflictWithAutoCrlf("\n", "\r\n");
+       }
+
        private static void addUnmergedEntry(String file, DirCacheBuilder builder) {
                DirCacheEntry stage1 = new DirCacheEntry(file, DirCacheEntry.STAGE_1);
                DirCacheEntry stage2 = new DirCacheEntry(file, DirCacheEntry.STAGE_2);
index 178d62072df24fca6e07476b88f0394db77f5f5f..5333451a96258b702600533930630853397f6033 100644 (file)
@@ -78,6 +78,36 @@ public class RawTextTest {
                assertEquals("f\0o-b", a.getString(1, 2, true));
        }
 
+       @Test
+       public void testCrLfTextYes() {
+               assertTrue(RawText
+                               .isCrLfText(Constants.encodeASCII("line 1\r\nline 2\r\n")));
+       }
+
+       @Test
+       public void testCrLfTextNo() {
+               assertFalse(
+                               RawText.isCrLfText(Constants.encodeASCII("line 1\nline 2\n")));
+       }
+
+       @Test
+       public void testCrLfTextBinary() {
+               assertFalse(RawText
+                               .isCrLfText(Constants.encodeASCII("line 1\r\nline\0 2\r\n")));
+       }
+
+       @Test
+       public void testCrLfTextMixed() {
+               assertTrue(RawText
+                               .isCrLfText(Constants.encodeASCII("line 1\nline 2\r\n")));
+       }
+
+       @Test
+       public void testCrLfTextCutShort() {
+               assertFalse(
+                               RawText.isCrLfText(Constants.encodeASCII("line 1\nline 2\r")));
+       }
+
        @Test
        public void testEquals() {
                final RawText a = new RawText(Constants.encodeASCII("foo-a\nfoo-b\n"));
index bd41d906803f0cd957012925bfb1c0c61278cbd0..6c0d90ebad356e32b1410f240905d658e5e094db 100644 (file)
@@ -308,6 +308,74 @@ public class RawText extends Sequence {
                return false;
        }
 
+       /**
+        * Determine heuristically whether a byte array represents text content
+        * using CR-LF as line separator.
+        *
+        * @param raw
+        *            the raw file content.
+        * @return {@code true} if raw is likely to be CR-LF delimited text,
+        *         {@code false} otherwise
+        * @since 5.3
+        */
+       public static boolean isCrLfText(byte[] raw) {
+               return isCrLfText(raw, raw.length);
+       }
+
+       /**
+        * Determine heuristically whether the bytes contained in a stream represent
+        * text content using CR-LF as line separator.
+        *
+        * Note: Do not further use this stream after having called this method! The
+        * stream may not be fully read and will be left at an unknown position
+        * after consuming an unknown number of bytes. The caller is responsible for
+        * closing the stream.
+        *
+        * @param raw
+        *            input stream containing the raw file content.
+        * @return {@code true} if raw is likely to be CR-LF delimited text,
+        *         {@code false} otherwise
+        * @throws java.io.IOException
+        *             if input stream could not be read
+        * @since 5.3
+        */
+       public static boolean isCrLfText(InputStream raw) throws IOException {
+               byte[] buffer = new byte[FIRST_FEW_BYTES];
+               int cnt = 0;
+               while (cnt < buffer.length) {
+                       int n = raw.read(buffer, cnt, buffer.length - cnt);
+                       if (n == -1) {
+                               break;
+                       }
+                       cnt += n;
+               }
+               return isCrLfText(buffer, cnt);
+       }
+
+       /**
+        * Determine heuristically whether a byte array represents text content
+        * using CR-LF as line separator.
+        *
+        * @param raw
+        *            the raw file content.
+        * @param length
+        *            number of bytes in {@code raw} to evaluate.
+        * @return {@code true} if raw is likely to be CR-LF delimited text,
+        *         {@code false} otherwise
+        * @since 5.3
+        */
+       public static boolean isCrLfText(byte[] raw, int length) {
+               boolean has_crlf = false;
+               for (int ptr = 0; ptr < length - 1; ptr++) {
+                       if (raw[ptr] == '\0') {
+                               return false; // binary
+                       } else if (raw[ptr] == '\r' && raw[ptr + 1] == '\n') {
+                               has_crlf = true;
+                       }
+               }
+               return has_crlf;
+       }
+
        /**
         * Get the line delimiter for the first line.
         *
index 1fa1db5847265cbbdb14c8a7a021ad7f7db124a1..b768acd050cada577b3e3ed444f012d2929eb416 100644 (file)
@@ -74,6 +74,7 @@ import org.eclipse.jgit.diff.RawText;
 import org.eclipse.jgit.dircache.DirCacheEntry;
 import org.eclipse.jgit.dircache.DirCacheIterator;
 import org.eclipse.jgit.errors.CorruptObjectException;
+import org.eclipse.jgit.errors.LargeObjectException;
 import org.eclipse.jgit.errors.MissingObjectException;
 import org.eclipse.jgit.errors.NoWorkTreeException;
 import org.eclipse.jgit.ignore.FastIgnoreRule;
@@ -1471,9 +1472,18 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator {
        private EolStreamType getEolStreamType(OperationType opType)
                        throws IOException {
                if (eolStreamTypeHolder == null) {
-                       EolStreamType type=null;
+                       EolStreamType type = null;
                        if (state.walk != null) {
                                type = state.walk.getEolStreamType(opType);
+                               OperationType operationType = opType != null ? opType
+                                               : state.walk.getOperationType();
+                               if (OperationType.CHECKIN_OP.equals(operationType)
+                                               && EolStreamType.AUTO_LF.equals(type)
+                                               && hasCrLfInIndex(getDirCacheIterator())) {
+                                       // If text=auto (or core.autocrlf=true) and the file has
+                                       // already been committed with CR/LF, then don't convert.
+                                       type = EolStreamType.DIRECT;
+                               }
                        } else {
                                switch (getOptions().getAutoCRLF()) {
                                case FALSE:
@@ -1490,6 +1500,59 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator {
                return eolStreamTypeHolder.get();
        }
 
+       /**
+        * Determines whether the file was committed un-normalized. If the iterator
+        * points to a conflict entry, checks the "ours" version.
+        *
+        * @param dirCache
+        *            iterator pointing to the current entry for the file in the
+        *            index
+        * @return {@code true} if the file in the index is not binary and has CR/LF
+        *         line endings, {@code false} otherwise
+        */
+       private boolean hasCrLfInIndex(DirCacheIterator dirCache) {
+               if (dirCache == null) {
+                       return false;
+               }
+               // Read blob from index and check for CR/LF-delimited text.
+               DirCacheEntry entry = dirCache.getDirCacheEntry();
+               if (FileMode.REGULAR_FILE.equals(entry.getFileMode())) {
+                       ObjectId blobId = entry.getObjectId();
+                       if (entry.getStage() > 0
+                                       && entry.getStage() != DirCacheEntry.STAGE_2) {
+                               // Merge conflict: check ours (stage 2)
+                               byte[] name = entry.getRawPath();
+                               int i = 0;
+                               while (!dirCache.eof()) {
+                                       dirCache.next(1);
+                                       i++;
+                                       entry = dirCache.getDirCacheEntry();
+                                       if (!Arrays.equals(name, entry.getRawPath())) {
+                                               break;
+                                       }
+                                       if (entry.getStage() == DirCacheEntry.STAGE_2) {
+                                               blobId = entry.getObjectId();
+                                               break;
+                                       }
+                               }
+                               dirCache.back(i);
+                       }
+                       try (ObjectReader reader = repository.newObjectReader()) {
+                               ObjectLoader loader = reader.open(blobId, Constants.OBJ_BLOB);
+                               try {
+                                       return RawText.isCrLfText(loader.getCachedBytes());
+                               } catch (LargeObjectException e) {
+                                       try (InputStream in = loader.openStream()) {
+                                               return RawText.isCrLfText(in);
+                                       }
+                               }
+                       } catch (IOException e) {
+                               // Ignore and return false below
+                       }
+               }
+               return false;
+       }
+
        private boolean isDirectoryIgnored(String pathRel) throws IOException {
                final int pOff = 0 < pathOffset ? pathOffset - 1 : pathOffset;
                final String base = TreeWalk.pathOf(this.path, 0, pOff);
index 08377e6be04be59a1a9a1805e16d04e1fe00312c..4c60862bf4c46e0dd4032e3510a7aa3c4a6a5eb3 100644 (file)
@@ -144,9 +144,18 @@ public class AutoCRLFInputStream extends InputStream {
        }
 
        private boolean fillBuffer() throws IOException {
-               cnt = in.read(buf, 0, buf.length);
-               if (cnt < 1)
+               cnt = 0;
+               while (cnt < buf.length) {
+                       int n = in.read(buf, cnt, buf.length - cnt);
+                       if (n < 0) {
+                               break;
+                       }
+                       cnt += n;
+               }
+               if (cnt < 1) {
+                       cnt = -1;
                        return false;
+               }
                if (detectBinary) {
                        isBinary = RawText.isBinary(buf, cnt);
                        detectBinary = false;
index ff28161a520b19bf7073c749a68b9d320c57a42f..280cf7e28ce22ad1f5ca4319271c3724e74ffdc2 100644 (file)
@@ -189,9 +189,18 @@ public class AutoLFInputStream extends InputStream {
        }
 
        private boolean fillBuffer() throws IOException {
-               cnt = in.read(buf, 0, buf.length);
-               if (cnt < 1)
+               cnt = 0;
+               while (cnt < buf.length) {
+                       int n = in.read(buf, cnt, buf.length - cnt);
+                       if (n < 0) {
+                               break;
+                       }
+                       cnt += n;
+               }
+               if (cnt < 1) {
+                       cnt = -1;
                        return false;
+               }
                if (detectBinary) {
                        isBinary = RawText.isBinary(buf, cnt);
                        detectBinary = false;