Browse Source

Enable large file support

Allow adding files with size over 2 GB. The drawback is that the tests
for huge file support adds roughly 10 minutes of execution time.
For that reason we @Ignore the test in the standard test execution.

Change-Id: I5788e8009899203b346f353297166825b3744575
tags/v2.0.0.201206130900-r
Robin Rosenberg 12 years ago
parent
commit
9c5b31703f

+ 236
- 0
org.eclipse.jgit.test/tst/org/eclipse/jgit/api/HugeFileTest.java View File

@@ -0,0 +1,236 @@
/*
* Copyright (C) 2012, Robin Rosenberg <robin.rosenberg@dewire.com>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.api;

import static org.junit.Assert.assertEquals;

import java.io.File;
import java.io.RandomAccessFile;
import java.util.Arrays;
import java.util.Collection;

import org.eclipse.jgit.api.ResetCommand.ResetType;
import org.eclipse.jgit.lib.RepositoryTestCase;
import org.junit.Ignore;
import org.junit.Test;

public class HugeFileTest extends RepositoryTestCase {

private long t = System.currentTimeMillis();

private long lastt = t;

private void measure(String name) {
long c = System.currentTimeMillis();
System.out.println(name + ", dt=" + (c - lastt) / 1000.0 + "s");
lastt = c;
}

@Ignore("Test takes way too long (~10 minutes) to be part of the standard suite")
@Test
public void testAddHugeFile() throws Exception {
measure("Commencing test");
File file = new File(db.getWorkTree(), "a.txt");
RandomAccessFile rf = new RandomAccessFile(file, "rw");
rf.setLength(4429185024L);
rf.close();
measure("Created file");
Git git = new Git(db);

git.add().addFilepattern("a.txt").call();
measure("Added file");
assertEquals(
"[a.txt, mode:100644, length:134217728, sha1:b8cfba97c2b962a44f080b3ca4e03b3204b6a350]",
indexState(LENGTH | CONTENT_ID));

Status status = git.status().call();
measure("Status after add");
assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertEquals(0, status.getModified().size());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());

// Does not change anything, but modified timestamp
rf = new RandomAccessFile(file, "rw");
rf.write(0);
rf.close();

status = git.status().call();
measure("Status after non-modifying update");

assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertEquals(0, status.getModified().size());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());

// Change something
rf = new RandomAccessFile(file, "rw");
rf.write('a');
rf.close();

status = git.status().call();
measure("Status after modifying update");

assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());

// Truncate mod 4G and re-establish equality
rf = new RandomAccessFile(file, "rw");
rf.setLength(134217728L);
rf.write(0);
rf.close();

status = git.status().call();
measure("Status after truncating update");

assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());

// Change something
rf = new RandomAccessFile(file, "rw");
rf.write('a');
rf.close();

status = git.status().call();
measure("Status after modifying and truncating update");

assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());

// Truncate to entry length becomes negative int
rf = new RandomAccessFile(file, "rw");
rf.setLength(3429185024L);
rf.write(0);
rf.close();

git.add().addFilepattern("a.txt").call();
measure("Added truncated file");
assertEquals(
"[a.txt, mode:100644, length:-865782272, sha1:59b3282f8f59f22d953df956ad3511bf2dc660fd]",
indexState(LENGTH | CONTENT_ID));

status = git.status().call();
measure("Status after status on truncated file");

assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertEquals(0, status.getModified().size());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());

// Change something
rf = new RandomAccessFile(file, "rw");
rf.write('a');
rf.close();

status = git.status().call();
measure("Status after modifying and truncating update");

assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());

git.commit().setMessage("make a commit").call();
measure("After commit");
status = git.status().call();
measure("After status after commit");

assertEquals(0, status.getAdded().size());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());

git.reset().setMode(ResetType.HARD).call();
measure("After reset --hard");
assertEquals(
"[a.txt, mode:100644, length:-865782272, sha1:59b3282f8f59f22d953df956ad3511bf2dc660fd]",
indexState(LENGTH | CONTENT_ID));

status = git.status().call();
measure("Status after hard reset");

assertEquals(0, status.getAdded().size());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertEquals(0, status.getModified().size());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());
}

private void assertCollectionEquals(Collection<?> asList,
Collection<?> added) {
assertEquals(asList.toString(), added.toString());
}

}

+ 7
- 8
org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheEntry.java View File

@@ -525,7 +525,7 @@ public class DirCacheEntry {
}

/**
* Get the cached size (in bytes) of this file.
* Get the cached size (mod 4 GB) (in bytes) of this file.
* <p>
* One of the indicators that the file has been modified by an application
* changing the working tree is if the size of the file (in bytes) differs
@@ -534,6 +534,10 @@ public class DirCacheEntry {
* Note that this is the length of the file in the working directory, which
* may differ from the size of the decompressed blob if work tree filters
* are being used, such as LF<->CRLF conversion.
* <p>
* Note also that for very large files, this is the size of the on-disk file
* truncated to 32 bits, i.e. modulo 4294967296. If that value is larger
* than 2GB, it will appear negative.
*
* @return cached size of the working directory file, in bytes.
*/
@@ -545,7 +549,8 @@ public class DirCacheEntry {
* Set the cached size (in bytes) of this file.
*
* @param sz
* new cached size of the file, as bytes.
* new cached size of the file, as bytes. If the file is larger
* than 2G, cast it to (int) before calling this method.
*/
public void setLength(final int sz) {
NB.encodeInt32(info, infoOffset + P_SIZE, sz);
@@ -556,15 +561,9 @@ public class DirCacheEntry {
*
* @param sz
* new cached size of the file, as bytes.
* @throws IllegalArgumentException
* if the size exceeds the 2 GiB barrier imposed by current file
* format limitations.
*/
@SuppressWarnings("boxing")
public void setLength(final long sz) {
if (Integer.MAX_VALUE <= sz)
throw new IllegalArgumentException(MessageFormat.format(JGitText
.get().sizeExceeds2GB, getPathString(), sz));
setLength((int) sz);
}


+ 1
- 1
org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java View File

@@ -711,7 +711,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator {
if (entry.isUpdateNeeded())
return MetadataDiff.DIFFER_BY_METADATA;

if (!entry.isSmudged() && (getEntryLength() != entry.getLength()))
if (!entry.isSmudged() && entry.getLength() != (int) getEntryLength())
return MetadataDiff.DIFFER_BY_METADATA;

// Determine difference in mode-bits of file and index-entry. In the

Loading…
Cancel
Save