]> source.dussan.org Git - jgit.git/commitdiff
Enable large file support 89/5589/4
authorRobin Rosenberg <robin.rosenberg@dewire.com>
Mon, 16 Apr 2012 22:18:10 +0000 (00:18 +0200)
committerRobin Rosenberg <robin.rosenberg@dewire.com>
Wed, 18 Apr 2012 19:59:15 +0000 (21:59 +0200)
Allow adding files with size over 2 GB. The drawback is that the tests
for huge file support adds roughly 10 minutes of execution time.
For that reason we @Ignore the test in the standard test execution.

Change-Id: I5788e8009899203b346f353297166825b3744575

org.eclipse.jgit.test/tst/org/eclipse/jgit/api/HugeFileTest.java [new file with mode: 0644]
org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheEntry.java
org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java

diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/HugeFileTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/HugeFileTest.java
new file mode 100644 (file)
index 0000000..10b6ba4
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2012, Robin Rosenberg <robin.rosenberg@dewire.com>
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials provided
+ *   with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ *   names of its contributors may be used to endorse or promote
+ *   products derived from this software without specific prior
+ *   written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package org.eclipse.jgit.api;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.RandomAccessFile;
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.eclipse.jgit.api.ResetCommand.ResetType;
+import org.eclipse.jgit.lib.RepositoryTestCase;
+import org.junit.Ignore;
+import org.junit.Test;
+
+public class HugeFileTest extends RepositoryTestCase {
+
+       private long t = System.currentTimeMillis();
+
+       private long lastt = t;
+
+       private void measure(String name) {
+               long c = System.currentTimeMillis();
+               System.out.println(name + ", dt=" + (c - lastt) / 1000.0 + "s");
+               lastt = c;
+       }
+
+       @Ignore("Test takes way too long (~10 minutes) to be part of the standard suite")
+       @Test
+       public void testAddHugeFile() throws Exception {
+               measure("Commencing test");
+               File file = new File(db.getWorkTree(), "a.txt");
+               RandomAccessFile rf = new RandomAccessFile(file, "rw");
+               rf.setLength(4429185024L);
+               rf.close();
+               measure("Created file");
+               Git git = new Git(db);
+
+               git.add().addFilepattern("a.txt").call();
+               measure("Added file");
+               assertEquals(
+                               "[a.txt, mode:100644, length:134217728, sha1:b8cfba97c2b962a44f080b3ca4e03b3204b6a350]",
+                               indexState(LENGTH | CONTENT_ID));
+
+               Status status = git.status().call();
+               measure("Status after add");
+               assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
+               assertEquals(0, status.getChanged().size());
+               assertEquals(0, status.getConflicting().size());
+               assertEquals(0, status.getMissing().size());
+               assertEquals(0, status.getModified().size());
+               assertEquals(0, status.getRemoved().size());
+               assertEquals(0, status.getUntracked().size());
+
+               // Does not change anything, but modified timestamp
+               rf = new RandomAccessFile(file, "rw");
+               rf.write(0);
+               rf.close();
+
+               status = git.status().call();
+               measure("Status after non-modifying update");
+
+               assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
+               assertEquals(0, status.getChanged().size());
+               assertEquals(0, status.getConflicting().size());
+               assertEquals(0, status.getMissing().size());
+               assertEquals(0, status.getModified().size());
+               assertEquals(0, status.getRemoved().size());
+               assertEquals(0, status.getUntracked().size());
+
+               // Change something
+               rf = new RandomAccessFile(file, "rw");
+               rf.write('a');
+               rf.close();
+
+               status = git.status().call();
+               measure("Status after modifying update");
+
+               assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
+               assertEquals(0, status.getChanged().size());
+               assertEquals(0, status.getConflicting().size());
+               assertEquals(0, status.getMissing().size());
+               assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
+               assertEquals(0, status.getRemoved().size());
+               assertEquals(0, status.getUntracked().size());
+
+               // Truncate mod 4G and re-establish equality
+               rf = new RandomAccessFile(file, "rw");
+               rf.setLength(134217728L);
+               rf.write(0);
+               rf.close();
+
+               status = git.status().call();
+               measure("Status after truncating update");
+
+               assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
+               assertEquals(0, status.getChanged().size());
+               assertEquals(0, status.getConflicting().size());
+               assertEquals(0, status.getMissing().size());
+               assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
+               assertEquals(0, status.getRemoved().size());
+               assertEquals(0, status.getUntracked().size());
+
+               // Change something
+               rf = new RandomAccessFile(file, "rw");
+               rf.write('a');
+               rf.close();
+
+               status = git.status().call();
+               measure("Status after modifying and truncating update");
+
+               assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
+               assertEquals(0, status.getChanged().size());
+               assertEquals(0, status.getConflicting().size());
+               assertEquals(0, status.getMissing().size());
+               assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
+               assertEquals(0, status.getRemoved().size());
+               assertEquals(0, status.getUntracked().size());
+
+               // Truncate to entry length becomes negative int
+               rf = new RandomAccessFile(file, "rw");
+               rf.setLength(3429185024L);
+               rf.write(0);
+               rf.close();
+
+               git.add().addFilepattern("a.txt").call();
+               measure("Added truncated file");
+               assertEquals(
+                               "[a.txt, mode:100644, length:-865782272, sha1:59b3282f8f59f22d953df956ad3511bf2dc660fd]",
+                               indexState(LENGTH | CONTENT_ID));
+
+               status = git.status().call();
+               measure("Status after status on truncated file");
+
+               assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
+               assertEquals(0, status.getChanged().size());
+               assertEquals(0, status.getConflicting().size());
+               assertEquals(0, status.getMissing().size());
+               assertEquals(0, status.getModified().size());
+               assertEquals(0, status.getRemoved().size());
+               assertEquals(0, status.getUntracked().size());
+
+               // Change something
+               rf = new RandomAccessFile(file, "rw");
+               rf.write('a');
+               rf.close();
+
+               status = git.status().call();
+               measure("Status after modifying and truncating update");
+
+               assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
+               assertEquals(0, status.getChanged().size());
+               assertEquals(0, status.getConflicting().size());
+               assertEquals(0, status.getMissing().size());
+               assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
+               assertEquals(0, status.getRemoved().size());
+               assertEquals(0, status.getUntracked().size());
+
+               git.commit().setMessage("make a commit").call();
+               measure("After commit");
+               status = git.status().call();
+               measure("After status after commit");
+
+               assertEquals(0, status.getAdded().size());
+               assertEquals(0, status.getChanged().size());
+               assertEquals(0, status.getConflicting().size());
+               assertEquals(0, status.getMissing().size());
+               assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
+               assertEquals(0, status.getRemoved().size());
+               assertEquals(0, status.getUntracked().size());
+
+               git.reset().setMode(ResetType.HARD).call();
+               measure("After reset --hard");
+               assertEquals(
+                               "[a.txt, mode:100644, length:-865782272, sha1:59b3282f8f59f22d953df956ad3511bf2dc660fd]",
+                               indexState(LENGTH | CONTENT_ID));
+
+               status = git.status().call();
+               measure("Status after hard reset");
+
+               assertEquals(0, status.getAdded().size());
+               assertEquals(0, status.getChanged().size());
+               assertEquals(0, status.getConflicting().size());
+               assertEquals(0, status.getMissing().size());
+               assertEquals(0, status.getModified().size());
+               assertEquals(0, status.getRemoved().size());
+               assertEquals(0, status.getUntracked().size());
+       }
+
+       private void assertCollectionEquals(Collection<?> asList,
+                       Collection<?> added) {
+               assertEquals(asList.toString(), added.toString());
+       }
+
+}
index 85df340d37cb763eaf947f33129737dfebabc485..08cc9a8d52f5d4475273c426fc3465903c55cd93 100644 (file)
@@ -525,7 +525,7 @@ public class DirCacheEntry {
        }
 
        /**
-        * Get the cached size (in bytes) of this file.
+        * Get the cached size (mod 4 GB) (in bytes) of this file.
         * <p>
         * One of the indicators that the file has been modified by an application
         * changing the working tree is if the size of the file (in bytes) differs
@@ -534,6 +534,10 @@ public class DirCacheEntry {
         * Note that this is the length of the file in the working directory, which
         * may differ from the size of the decompressed blob if work tree filters
         * are being used, such as LF<->CRLF conversion.
+        * <p>
+        * Note also that for very large files, this is the size of the on-disk file
+        * truncated to 32 bits, i.e. modulo 4294967296. If that value is larger
+        * than 2GB, it will appear negative.
         *
         * @return cached size of the working directory file, in bytes.
         */
@@ -545,7 +549,8 @@ public class DirCacheEntry {
         * Set the cached size (in bytes) of this file.
         *
         * @param sz
-        *            new cached size of the file, as bytes.
+        *            new cached size of the file, as bytes. If the file is larger
+        *            than 2G, cast it to (int) before calling this method.
         */
        public void setLength(final int sz) {
                NB.encodeInt32(info, infoOffset + P_SIZE, sz);
@@ -556,15 +561,9 @@ public class DirCacheEntry {
         *
         * @param sz
         *            new cached size of the file, as bytes.
-        * @throws IllegalArgumentException
-        *             if the size exceeds the 2 GiB barrier imposed by current file
-        *             format limitations.
         */
        @SuppressWarnings("boxing")
        public void setLength(final long sz) {
-               if (Integer.MAX_VALUE <= sz)
-                       throw new IllegalArgumentException(MessageFormat.format(JGitText
-                                       .get().sizeExceeds2GB, getPathString(), sz));
                setLength((int) sz);
        }
 
index 027101bf881eff8925a34364e5551e65093b444e..54eaeb9dfb03e744cbf86939a96b8965ce338920 100644 (file)
@@ -711,7 +711,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator {
                if (entry.isUpdateNeeded())
                        return MetadataDiff.DIFFER_BY_METADATA;
 
-               if (!entry.isSmudged() && (getEntryLength() != entry.getLength()))
+               if (!entry.isSmudged() && entry.getLength() != (int) getEntryLength())
                        return MetadataDiff.DIFFER_BY_METADATA;
 
                // Determine difference in mode-bits of file and index-entry. In the