diff options
3 files changed, 244 insertions, 9 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/HugeFileTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/HugeFileTest.java new file mode 100644 index 0000000000..10b6ba48b3 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/HugeFileTest.java @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2012, Robin Rosenberg <robin.rosenberg@dewire.com> + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.eclipse.jgit.api; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.RandomAccessFile; +import java.util.Arrays; +import java.util.Collection; + +import org.eclipse.jgit.api.ResetCommand.ResetType; +import org.eclipse.jgit.lib.RepositoryTestCase; +import org.junit.Ignore; +import org.junit.Test; + +public class HugeFileTest extends RepositoryTestCase { + + private long t = System.currentTimeMillis(); + + private long lastt = t; + + private void measure(String name) { + long c = System.currentTimeMillis(); + System.out.println(name + ", dt=" + (c - lastt) / 1000.0 + "s"); + lastt = c; + } + + @Ignore("Test takes way too long (~10 minutes) to be part of the standard suite") + @Test + public void testAddHugeFile() throws Exception { + measure("Commencing test"); + File file = new File(db.getWorkTree(), "a.txt"); + RandomAccessFile rf = new RandomAccessFile(file, "rw"); + rf.setLength(4429185024L); + rf.close(); + measure("Created file"); + Git git = new Git(db); + + git.add().addFilepattern("a.txt").call(); + measure("Added file"); + assertEquals( + "[a.txt, mode:100644, length:134217728, sha1:b8cfba97c2b962a44f080b3ca4e03b3204b6a350]", + indexState(LENGTH | CONTENT_ID)); + + Status status = git.status().call(); + measure("Status after add"); + assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded()); + assertEquals(0, status.getChanged().size()); + assertEquals(0, status.getConflicting().size()); + assertEquals(0, status.getMissing().size()); + assertEquals(0, status.getModified().size()); + assertEquals(0, status.getRemoved().size()); + assertEquals(0, status.getUntracked().size()); + + // Does not change anything, but modified timestamp + rf = new RandomAccessFile(file, "rw"); + rf.write(0); + rf.close(); + + status = git.status().call(); + measure("Status after non-modifying update"); + + assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded()); + assertEquals(0, status.getChanged().size()); + assertEquals(0, status.getConflicting().size()); + assertEquals(0, status.getMissing().size()); + assertEquals(0, status.getModified().size()); + assertEquals(0, status.getRemoved().size()); + assertEquals(0, status.getUntracked().size()); + + // Change something + rf = new RandomAccessFile(file, "rw"); + rf.write('a'); + rf.close(); + + status = git.status().call(); + measure("Status after modifying update"); + + assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded()); + assertEquals(0, status.getChanged().size()); + assertEquals(0, status.getConflicting().size()); + assertEquals(0, status.getMissing().size()); + assertCollectionEquals(Arrays.asList("a.txt"), status.getModified()); + assertEquals(0, status.getRemoved().size()); + assertEquals(0, status.getUntracked().size()); + + // Truncate mod 4G and re-establish equality + rf = new RandomAccessFile(file, "rw"); + rf.setLength(134217728L); + rf.write(0); + rf.close(); + + status = git.status().call(); + measure("Status after truncating update"); + + assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded()); + assertEquals(0, status.getChanged().size()); + assertEquals(0, status.getConflicting().size()); + assertEquals(0, status.getMissing().size()); + assertCollectionEquals(Arrays.asList("a.txt"), status.getModified()); + assertEquals(0, status.getRemoved().size()); + assertEquals(0, status.getUntracked().size()); + + // Change something + rf = new RandomAccessFile(file, "rw"); + rf.write('a'); + rf.close(); + + status = git.status().call(); + measure("Status after modifying and truncating update"); + + assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded()); + assertEquals(0, status.getChanged().size()); + assertEquals(0, status.getConflicting().size()); + assertEquals(0, status.getMissing().size()); + assertCollectionEquals(Arrays.asList("a.txt"), status.getModified()); + assertEquals(0, status.getRemoved().size()); + assertEquals(0, status.getUntracked().size()); + + // Truncate to entry length becomes negative int + rf = new RandomAccessFile(file, "rw"); + rf.setLength(3429185024L); + rf.write(0); + rf.close(); + + git.add().addFilepattern("a.txt").call(); + measure("Added truncated file"); + assertEquals( + "[a.txt, mode:100644, length:-865782272, sha1:59b3282f8f59f22d953df956ad3511bf2dc660fd]", + indexState(LENGTH | CONTENT_ID)); + + status = git.status().call(); + measure("Status after status on truncated file"); + + assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded()); + assertEquals(0, status.getChanged().size()); + assertEquals(0, status.getConflicting().size()); + assertEquals(0, status.getMissing().size()); + assertEquals(0, status.getModified().size()); + assertEquals(0, status.getRemoved().size()); + assertEquals(0, status.getUntracked().size()); + + // Change something + rf = new RandomAccessFile(file, "rw"); + rf.write('a'); + rf.close(); + + status = git.status().call(); + measure("Status after modifying and truncating update"); + + assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded()); + assertEquals(0, status.getChanged().size()); + assertEquals(0, status.getConflicting().size()); + assertEquals(0, status.getMissing().size()); + assertCollectionEquals(Arrays.asList("a.txt"), status.getModified()); + assertEquals(0, status.getRemoved().size()); + assertEquals(0, status.getUntracked().size()); + + git.commit().setMessage("make a commit").call(); + measure("After commit"); + status = git.status().call(); + measure("After status after commit"); + + assertEquals(0, status.getAdded().size()); + assertEquals(0, status.getChanged().size()); + assertEquals(0, status.getConflicting().size()); + assertEquals(0, status.getMissing().size()); + assertCollectionEquals(Arrays.asList("a.txt"), status.getModified()); + assertEquals(0, status.getRemoved().size()); + assertEquals(0, status.getUntracked().size()); + + git.reset().setMode(ResetType.HARD).call(); + measure("After reset --hard"); + assertEquals( + "[a.txt, mode:100644, length:-865782272, sha1:59b3282f8f59f22d953df956ad3511bf2dc660fd]", + indexState(LENGTH | CONTENT_ID)); + + status = git.status().call(); + measure("Status after hard reset"); + + assertEquals(0, status.getAdded().size()); + assertEquals(0, status.getChanged().size()); + assertEquals(0, status.getConflicting().size()); + assertEquals(0, status.getMissing().size()); + assertEquals(0, status.getModified().size()); + assertEquals(0, status.getRemoved().size()); + assertEquals(0, status.getUntracked().size()); + } + + private void assertCollectionEquals(Collection<?> asList, + Collection<?> added) { + assertEquals(asList.toString(), added.toString()); + } + +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheEntry.java b/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheEntry.java index 85df340d37..08cc9a8d52 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheEntry.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheEntry.java @@ -525,7 +525,7 @@ public class DirCacheEntry { } /** - * Get the cached size (in bytes) of this file. + * Get the cached size (mod 4 GB) (in bytes) of this file. * <p> * One of the indicators that the file has been modified by an application * changing the working tree is if the size of the file (in bytes) differs @@ -534,6 +534,10 @@ public class DirCacheEntry { * Note that this is the length of the file in the working directory, which * may differ from the size of the decompressed blob if work tree filters * are being used, such as LF<->CRLF conversion. + * <p> + * Note also that for very large files, this is the size of the on-disk file + * truncated to 32 bits, i.e. modulo 4294967296. If that value is larger + * than 2GB, it will appear negative. * * @return cached size of the working directory file, in bytes. */ @@ -545,7 +549,8 @@ public class DirCacheEntry { * Set the cached size (in bytes) of this file. * * @param sz - * new cached size of the file, as bytes. + * new cached size of the file, as bytes. If the file is larger + * than 2G, cast it to (int) before calling this method. */ public void setLength(final int sz) { NB.encodeInt32(info, infoOffset + P_SIZE, sz); @@ -556,15 +561,9 @@ public class DirCacheEntry { * * @param sz * new cached size of the file, as bytes. - * @throws IllegalArgumentException - * if the size exceeds the 2 GiB barrier imposed by current file - * format limitations. */ @SuppressWarnings("boxing") public void setLength(final long sz) { - if (Integer.MAX_VALUE <= sz) - throw new IllegalArgumentException(MessageFormat.format(JGitText - .get().sizeExceeds2GB, getPathString(), sz)); setLength((int) sz); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java index 027101bf88..54eaeb9dfb 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java @@ -711,7 +711,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { if (entry.isUpdateNeeded()) return MetadataDiff.DIFFER_BY_METADATA; - if (!entry.isSmudged() && (getEntryLength() != entry.getLength())) + if (!entry.isSmudged() && entry.getLength() != (int) getEntryLength()) return MetadataDiff.DIFFER_BY_METADATA; // Determine difference in mode-bits of file and index-entry. In the |