From bd970007be5c07375b9915033992de9a167e8196 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Thu, 10 Mar 2011 15:42:32 -0800 Subject: ObjectIdOwnerMap: More lightweight map for ObjectIds OwnerMap is about 200 ms faster than SubclassMap, more friendly to the GC, and uses less storage: testing the "Counting objects" part of PackWriter on 1886362 objects: ObjectIdSubclassMap: load factor 50% table: 4194304 (wasted 2307942) ms spent 36998 36009 34795 34703 34941 35070 34284 34511 34638 34256 ms avg 34800 (last 9 runs) ObjectIdOwnerMap: load factor 100% table: 2097152 (wasted 210790) directory: 1024 ms spent 36842 35112 34922 34703 34580 34782 34165 34662 34314 34140 ms avg 34597 (last 9 runs) The major difference with OwnerMap is entries must extend from ObjectIdOwnerMap.Entry, where the OwnerMap has injected its own private "next" field into each object. This allows the OwnerMap to use a singly linked list for chaining collisions within a bucket. By putting collisions in a linked list, we gain the entire table back for the SHA-1 bits to index their own "private" slot. Unfortunately this means that each object can appear in at most ONE OwnerMap, as there is only one "next" field within the object instance to thread into the map. For types that are very object map heavy like RevWalk (entity RevObject) and PackWriter (entity ObjectToPack) this is sufficient, these entity types are only put into one map by their container. By introducing a new map type, we don't break existing applications that might be trying to use ObjectIdSubclassMap to track RevCommits they obtained from a RevWalk. The OwnerMap uses less memory. Each object uses 1 reference more (so we're up 1,886,362 references), but the table is 1/2 the size (2^20 rather than 2^21). The table itself wastes only 210,790 slots, rather than 2,307,942. So OwnerMap is wasting 200k fewer references. OwnerMap is more friendly to the GC, because it hardly ever generates garbage. As the map reaches its 100% load factor target, it doubles in size by allocating additional segment arrays of 2048 entries. (So the first grow allocates 1 segment, second 2 segments, third 4 segments, etc.) These segments are hooked into the pre-allocated directory of 1024 spaces. This permits the map to grow to 2 million objects before the directory itself has to grow. By using segments of 2048 entries, we are asking the GC to acquire 8,204 bytes in a 32 bit JVM. This is easier to satisfy then 2,307,942 bytes (for the 512k table that is just an intermediate step in the SubclassMap). By reusing the previously allocated segments (they are re-hashed in-place) we don't release any memory during a table grow. When the directory grows, it does so by discarding the old one and using one that is 4x larger (so the directory goes to 4096 entries on its first grow). A directory of size 4096 can handle up to 8 millon objects. The second directory grow (16384) goes to 33 million objects. At that point we're starting to really push the limits of the JVM heap, but at least its many small arrays. Previously SubclassMap would need a table of 67108864 entries to handle that object count, which needs a single contiguous allocation of 256 MiB. That's hard to come by in a 32 bit JVM. Instead OwnerMap uses 8192 arrays of about 8 KiB each. This is much easier to fit into a fragmented heap. Change-Id: Ia4acf5cfbf7e9b71bc7faa0db9060f6a969c0c50 Signed-off-by: Shawn O. Pearce --- .../org/eclipse/jgit/lib/ObjectIdOwnerMapTest.java | 217 +++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ObjectIdOwnerMapTest.java (limited to 'org.eclipse.jgit.test/tst/org/eclipse/jgit') diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ObjectIdOwnerMapTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ObjectIdOwnerMapTest.java new file mode 100644 index 0000000000..a36a5e9429 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ObjectIdOwnerMapTest.java @@ -0,0 +1,217 @@ +/* + * Copyright (C) 2011, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.lib; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +import org.junit.Before; +import org.junit.Test; + +public class ObjectIdOwnerMapTest { + private MutableObjectId idBuf; + + private SubId id_1, id_2, id_3, id_a31, id_b31; + + @Before + public void init() { + idBuf = new MutableObjectId(); + id_1 = new SubId(id(1)); + id_2 = new SubId(id(2)); + id_3 = new SubId(id(3)); + id_a31 = new SubId(id(31)); + id_b31 = new SubId(id((1 << 8) + 31)); + } + + @Test + public void testEmptyMap() { + ObjectIdOwnerMap m = new ObjectIdOwnerMap(); + assertTrue(m.isEmpty()); + assertEquals(0, m.size()); + + Iterator i = m.iterator(); + assertNotNull(i); + assertFalse(i.hasNext()); + + assertFalse(m.contains(id(1))); + } + + @Test + public void testAddGetAndContains() { + ObjectIdOwnerMap m = new ObjectIdOwnerMap(); + m.add(id_1); + m.add(id_2); + m.add(id_3); + m.add(id_a31); + m.add(id_b31); + assertFalse(m.isEmpty()); + assertEquals(5, m.size()); + + assertSame(id_1, m.get(id_1)); + assertSame(id_1, m.get(id(1))); + assertSame(id_1, m.get(id(1).copy())); + assertSame(id_2, m.get(id(2).copy())); + assertSame(id_3, m.get(id(3).copy())); + assertSame(id_a31, m.get(id(31).copy())); + assertSame(id_b31, m.get(id_b31.copy())); + + assertTrue(m.contains(id_1)); + } + + @Test + public void testClear() { + ObjectIdOwnerMap m = new ObjectIdOwnerMap(); + + m.add(id_1); + assertSame(id_1, m.get(id_1)); + + m.clear(); + assertTrue(m.isEmpty()); + assertEquals(0, m.size()); + + Iterator i = m.iterator(); + assertNotNull(i); + assertFalse(i.hasNext()); + + assertFalse(m.contains(id(1))); + } + + @Test + public void testAddIfAbsent() { + ObjectIdOwnerMap m = new ObjectIdOwnerMap(); + m.add(id_1); + + assertSame(id_1, m.addIfAbsent(new SubId(id_1))); + assertEquals(1, m.size()); + + assertSame(id_2, m.addIfAbsent(id_2)); + assertEquals(2, m.size()); + assertSame(id_a31, m.addIfAbsent(id_a31)); + assertSame(id_b31, m.addIfAbsent(id_b31)); + + assertSame(id_a31, m.addIfAbsent(new SubId(id_a31))); + assertSame(id_b31, m.addIfAbsent(new SubId(id_b31))); + assertEquals(4, m.size()); + } + + @Test + public void testAddGrowsWithObjects() { + int n = 16384; + ObjectIdOwnerMap m = new ObjectIdOwnerMap(); + m.add(id_1); + for (int i = 32; i < n; i++) + m.add(new SubId(id(i))); + assertEquals(n - 32 + 1, m.size()); + + assertSame(id_1, m.get(id_1.copy())); + for (int i = 32; i < n; i++) + assertTrue(m.contains(id(i))); + } + + @Test + public void testAddIfAbsentGrowsWithObjects() { + int n = 16384; + ObjectIdOwnerMap m = new ObjectIdOwnerMap(); + m.add(id_1); + for (int i = 32; i < n; i++) + m.addIfAbsent(new SubId(id(i))); + assertEquals(n - 32 + 1, m.size()); + + assertSame(id_1, m.get(id_1.copy())); + for (int i = 32; i < n; i++) + assertTrue(m.contains(id(i))); + } + + @Test + public void testIterator() { + ObjectIdOwnerMap m = new ObjectIdOwnerMap(); + m.add(id_1); + m.add(id_2); + m.add(id_3); + + Iterator i = m.iterator(); + assertTrue(i.hasNext()); + assertSame(id_1, i.next()); + assertTrue(i.hasNext()); + assertSame(id_2, i.next()); + assertTrue(i.hasNext()); + assertSame(id_3, i.next()); + + assertFalse(i.hasNext()); + try { + i.next(); + fail("did not fail on next with no next"); + } catch (NoSuchElementException expected) { + // OK + } + + i = m.iterator(); + assertSame(id_1, i.next()); + try { + i.remove(); + fail("did not fail on remove"); + } catch (UnsupportedOperationException expected) { + // OK + } + } + + private AnyObjectId id(int val) { + idBuf.setByte(0, val & 0xff); + idBuf.setByte(3, (val >>> 8) & 0xff); + return idBuf; + } + + private static class SubId extends ObjectIdOwnerMap.Entry { + SubId(AnyObjectId id) { + super(id); + } + } +} -- cgit v1.2.3