Browse Source

reftable: merge-join reftables

MergedReftable combines multiple reference tables together in a stack,
allowing higher/later tables to shadow earlier/lower tables.  This
forms the basis of a transaction system, where each transaction writes
a new reftable containing only the modified references, and readers
perform a merge on the fly to get the latest value.

Change-Id: Ic2cb750141e8c61a8b2726b2eb95195acb6ddc83
tags/v4.9.0.201710071750-r
Shawn Pearce 7 years ago
parent
commit
77d8eead6d

+ 324
- 0
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/MergedReftableTest.java View File

@@ -0,0 +1,324 @@
/*
* Copyright (C) 2017, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package org.eclipse.jgit.internal.storage.reftable;

import static org.eclipse.jgit.lib.Constants.HEAD;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
import static org.eclipse.jgit.lib.Constants.R_HEADS;
import static org.eclipse.jgit.lib.Ref.Storage.NEW;
import static org.eclipse.jgit.lib.Ref.Storage.PACKED;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.eclipse.jgit.internal.storage.io.BlockSource;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectIdRef;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.RefComparator;
import org.junit.Test;

public class MergedReftableTest {
@Test
public void noTables() throws IOException {
MergedReftable mr = merge(new byte[0][]);
try (RefCursor rc = mr.allRefs()) {
assertFalse(rc.next());
}
try (RefCursor rc = mr.seekRef(HEAD)) {
assertFalse(rc.next());
}
try (RefCursor rc = mr.seekRef(R_HEADS)) {
assertFalse(rc.next());
}
}

@Test
public void oneEmptyTable() throws IOException {
MergedReftable mr = merge(write());
try (RefCursor rc = mr.allRefs()) {
assertFalse(rc.next());
}
try (RefCursor rc = mr.seekRef(HEAD)) {
assertFalse(rc.next());
}
try (RefCursor rc = mr.seekRef(R_HEADS)) {
assertFalse(rc.next());
}
}

@Test
public void twoEmptyTables() throws IOException {
MergedReftable mr = merge(write(), write());
try (RefCursor rc = mr.allRefs()) {
assertFalse(rc.next());
}
try (RefCursor rc = mr.seekRef(HEAD)) {
assertFalse(rc.next());
}
try (RefCursor rc = mr.seekRef(R_HEADS)) {
assertFalse(rc.next());
}
}

@SuppressWarnings("boxing")
@Test
public void oneTableScan() throws IOException {
List<Ref> refs = new ArrayList<>();
for (int i = 1; i <= 567; i++) {
refs.add(ref(String.format("refs/heads/%03d", i), i));
}

MergedReftable mr = merge(write(refs));
try (RefCursor rc = mr.allRefs()) {
for (Ref exp : refs) {
assertTrue("has " + exp.getName(), rc.next());
Ref act = rc.getRef();
assertEquals(exp.getName(), act.getName());
assertEquals(exp.getObjectId(), act.getObjectId());
}
assertFalse(rc.next());
}
}

@Test
public void deleteIsHidden() throws IOException {
List<Ref> delta1 = Arrays.asList(
ref("refs/heads/apple", 1),
ref("refs/heads/master", 2));
List<Ref> delta2 = Arrays.asList(delete("refs/heads/apple"));

MergedReftable mr = merge(write(delta1), write(delta2));
try (RefCursor rc = mr.allRefs()) {
assertTrue(rc.next());
assertEquals("refs/heads/master", rc.getRef().getName());
assertEquals(id(2), rc.getRef().getObjectId());
assertFalse(rc.next());
}
}

@Test
public void twoTableSeek() throws IOException {
List<Ref> delta1 = Arrays.asList(
ref("refs/heads/apple", 1),
ref("refs/heads/master", 2));
List<Ref> delta2 = Arrays.asList(ref("refs/heads/banana", 3));

MergedReftable mr = merge(write(delta1), write(delta2));
try (RefCursor rc = mr.seekRef("refs/heads/master")) {
assertTrue(rc.next());
assertEquals("refs/heads/master", rc.getRef().getName());
assertEquals(id(2), rc.getRef().getObjectId());
assertFalse(rc.next());
}
}

@Test
public void twoTableById() throws IOException {
List<Ref> delta1 = Arrays.asList(
ref("refs/heads/apple", 1),
ref("refs/heads/master", 2));
List<Ref> delta2 = Arrays.asList(ref("refs/heads/banana", 3));

MergedReftable mr = merge(write(delta1), write(delta2));
try (RefCursor rc = mr.byObjectId(id(2))) {
assertTrue(rc.next());
assertEquals("refs/heads/master", rc.getRef().getName());
assertEquals(id(2), rc.getRef().getObjectId());
assertFalse(rc.next());
}
}

@SuppressWarnings("boxing")
@Test
public void fourTableScan() throws IOException {
List<Ref> base = new ArrayList<>();
for (int i = 1; i <= 567; i++) {
base.add(ref(String.format("refs/heads/%03d", i), i));
}

List<Ref> delta1 = Arrays.asList(
ref("refs/heads/next", 4),
ref(String.format("refs/heads/%03d", 55), 4096));
List<Ref> delta2 = Arrays.asList(
delete("refs/heads/next"),
ref(String.format("refs/heads/%03d", 55), 8192));
List<Ref> delta3 = Arrays.asList(
ref("refs/heads/master", 4242),
ref(String.format("refs/heads/%03d", 42), 5120),
ref(String.format("refs/heads/%03d", 98), 6120));

List<Ref> expected = merge(base, delta1, delta2, delta3);
MergedReftable mr = merge(
write(base),
write(delta1),
write(delta2),
write(delta3));
try (RefCursor rc = mr.allRefs()) {
for (Ref exp : expected) {
assertTrue("has " + exp.getName(), rc.next());
Ref act = rc.getRef();
assertEquals(exp.getName(), act.getName());
assertEquals(exp.getObjectId(), act.getObjectId());
}
assertFalse(rc.next());
}
}

@Test
public void scanIncludeDeletes() throws IOException {
List<Ref> delta1 = Arrays.asList(ref("refs/heads/next", 4));
List<Ref> delta2 = Arrays.asList(delete("refs/heads/next"));
List<Ref> delta3 = Arrays.asList(ref("refs/heads/master", 8));

MergedReftable mr = merge(write(delta1), write(delta2), write(delta3));
mr.setIncludeDeletes(true);
try (RefCursor rc = mr.allRefs()) {
assertTrue(rc.next());
Ref r = rc.getRef();
assertEquals("refs/heads/master", r.getName());
assertEquals(id(8), r.getObjectId());

assertTrue(rc.next());
r = rc.getRef();
assertEquals("refs/heads/next", r.getName());
assertEquals(NEW, r.getStorage());
assertNull(r.getObjectId());

assertFalse(rc.next());
}
}

@SuppressWarnings("boxing")
@Test
public void oneTableSeek() throws IOException {
List<Ref> refs = new ArrayList<>();
for (int i = 1; i <= 567; i++) {
refs.add(ref(String.format("refs/heads/%03d", i), i));
}

MergedReftable mr = merge(write(refs));
for (Ref exp : refs) {
try (RefCursor rc = mr.seekRef(exp.getName())) {
assertTrue("has " + exp.getName(), rc.next());
Ref act = rc.getRef();
assertEquals(exp.getName(), act.getName());
assertEquals(exp.getObjectId(), act.getObjectId());
assertFalse(rc.next());
}
}
}

private static MergedReftable merge(byte[]... table) {
List<Reftable> stack = new ArrayList<>(table.length);
for (byte[] b : table) {
stack.add(read(b));
}
return new MergedReftable(stack);
}

private static ReftableReader read(byte[] table) {
return new ReftableReader(BlockSource.from(table));
}

private static Ref ref(String name, int id) {
return new ObjectIdRef.PeeledNonTag(PACKED, name, id(id));
}

private static Ref delete(String name) {
return new ObjectIdRef.Unpeeled(NEW, name, null);
}

private static ObjectId id(int i) {
byte[] buf = new byte[OBJECT_ID_LENGTH];
buf[0] = (byte) (i & 0xff);
buf[1] = (byte) ((i >>> 8) & 0xff);
buf[2] = (byte) ((i >>> 16) & 0xff);
buf[3] = (byte) (i >>> 24);
return ObjectId.fromRaw(buf);
}

private byte[] write(Ref... refs) throws IOException {
return write(Arrays.asList(refs));
}

private byte[] write(Collection<Ref> refs) throws IOException {
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
ReftableWriter writer = new ReftableWriter().begin(buffer);
for (Ref r : RefComparator.sort(refs)) {
writer.writeRef(r);
}
writer.finish();
return buffer.toByteArray();
}

@SafeVarargs
private static List<Ref> merge(List<Ref>... tables) {
Map<String, Ref> expect = new HashMap<>();
for (List<Ref> t : tables) {
for (Ref r : t) {
if (r.getStorage() == NEW && r.getObjectId() == null) {
expect.remove(r.getName());
} else {
expect.put(r.getName(), r);
}
}
}

List<Ref> expected = new ArrayList<>(expect.values());
Collections.sort(expected, RefComparator.INSTANCE);
return expected;
}
}

+ 375
- 0
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/MergedReftable.java View File

@@ -0,0 +1,375 @@
/*
* Copyright (C) 2017, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package org.eclipse.jgit.internal.storage.reftable;

import java.io.IOException;
import java.util.List;
import java.util.PriorityQueue;

import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.ReflogEntry;

/**
* Merges multiple reference tables together.
* <p>
* A {@link MergedReftable} merge-joins multiple {@link ReftableReader} on the
* fly. Tables higher/later in the stack shadow lower/earlier tables, hiding
* references that been updated/replaced.
* <p>
* By default deleted references are skipped and not returned to the caller.
* {@link #setIncludeDeletes(boolean)} can be used to modify this behavior if
* the caller needs to preserve deletions during partial compaction.
* <p>
* A {@code MergedReftable} is not thread-safe.
*/
public class MergedReftable extends Reftable {
private final Reftable[] tables;

/**
* Initialize a merged table reader.
* <p>
* The tables in {@code tableStack} will be closed when this
* {@code MergedReftable} is closed.
*
* @param tableStack
* stack of tables to read from. The base of the stack is at
* index 0, the most recent should be at the top of the stack at
* {@code tableStack.size() - 1}. The top of the stack (higher
* index) shadows the base of the stack (lower index).
*/
public MergedReftable(List<Reftable> tableStack) {
tables = tableStack.toArray(new Reftable[0]);

// Tables must expose deletes to this instance to correctly
// shadow references from lower tables.
for (Reftable t : tables) {
t.setIncludeDeletes(true);
}
}

@Override
public RefCursor allRefs() throws IOException {
MergedRefCursor m = new MergedRefCursor();
for (int i = 0; i < tables.length; i++) {
m.add(new RefQueueEntry(tables[i].allRefs(), i));
}
return m;
}

@Override
public RefCursor seekRef(String name) throws IOException {
if (name.endsWith("/")) { //$NON-NLS-1$
return seekRefPrefix(name);
}
return seekSingleRef(name);
}

private RefCursor seekRefPrefix(String name) throws IOException {
MergedRefCursor m = new MergedRefCursor();
for (int i = 0; i < tables.length; i++) {
m.add(new RefQueueEntry(tables[i].seekRef(name), i));
}
return m;
}

private RefCursor seekSingleRef(String name) throws IOException {
// Walk the tables from highest priority (end of list) to lowest.
// As soon as the reference is found (queue not empty), all lower
// priority tables are irrelevant as current table shadows them.
MergedRefCursor m = new MergedRefCursor();
for (int i = tables.length - 1; i >= 0 && m.queue.isEmpty(); i--) {
m.add(new RefQueueEntry(tables[i].seekRef(name), i));
}
return m;
}

@Override
public RefCursor byObjectId(AnyObjectId name) throws IOException {
MergedRefCursor m = new MergedRefCursor();
for (int i = 0; i < tables.length; i++) {
m.add(new RefQueueEntry(tables[i].byObjectId(name), i));
}
return m;
}

@Override
public LogCursor allLogs() throws IOException {
MergedLogCursor m = new MergedLogCursor();
for (int i = 0; i < tables.length; i++) {
m.add(new LogQueueEntry(tables[i].allLogs(), i));
}
return m;
}

@Override
public LogCursor seekLog(String refName, long updateIdx)
throws IOException {
MergedLogCursor m = new MergedLogCursor();
for (int i = 0; i < tables.length; i++) {
m.add(new LogQueueEntry(tables[i].seekLog(refName, updateIdx), i));
}
return m;
}

@Override
public void close() throws IOException {
for (Reftable t : tables) {
t.close();
}
}

int queueSize() {
return Math.max(1, tables.length);
}

private class MergedRefCursor extends RefCursor {
private final PriorityQueue<RefQueueEntry> queue;
private RefQueueEntry head;
private Ref ref;

MergedRefCursor() {
queue = new PriorityQueue<>(queueSize(), RefQueueEntry::compare);
}

void add(RefQueueEntry t) throws IOException {
// Common case is many iterations over the same RefQueueEntry
// for the bottom of the stack (scanning all refs). Its almost
// always less than the top of the queue. Avoid the queue's
// O(log N) insertion and removal costs for this common case.
if (!t.rc.next()) {
t.rc.close();
} else if (head == null) {
RefQueueEntry p = queue.peek();
if (p == null || RefQueueEntry.compare(t, p) < 0) {
head = t;
} else {
head = queue.poll();
queue.add(t);
}
} else if (RefQueueEntry.compare(t, head) > 0) {
queue.add(t);
} else {
queue.add(head);
head = t;
}
}

@Override
public boolean next() throws IOException {
for (;;) {
RefQueueEntry t = poll();
if (t == null) {
return false;
}

ref = t.rc.getRef();
boolean include = includeDeletes || !t.rc.wasDeleted();
skipShadowedRefs(ref.getName());
add(t);
if (include) {
return true;
}
}
}

private RefQueueEntry poll() {
RefQueueEntry e = head;
if (e != null) {
head = null;
return e;
}
return queue.poll();
}

private void skipShadowedRefs(String name) throws IOException {
for (;;) {
RefQueueEntry t = head != null ? head : queue.peek();
if (t != null && name.equals(t.name())) {
add(poll());
} else {
break;
}
}
}

@Override
public Ref getRef() {
return ref;
}

@Override
public void close() {
while (!queue.isEmpty()) {
queue.remove().rc.close();
}
}
}

private static class RefQueueEntry {
static int compare(RefQueueEntry a, RefQueueEntry b) {
int cmp = a.name().compareTo(b.name());
if (cmp == 0) {
// higher index shadows lower index, so higher index first.
cmp = b.stackIdx - a.stackIdx;
}
return cmp;
}

final RefCursor rc;
final int stackIdx;

RefQueueEntry(RefCursor rc, int stackIdx) {
this.rc = rc;
this.stackIdx = stackIdx;
}

String name() {
return rc.getRef().getName();
}
}

private class MergedLogCursor extends LogCursor {
private final PriorityQueue<LogQueueEntry> queue;
private String refName;
private long updateIndex;
private ReflogEntry entry;

MergedLogCursor() {
queue = new PriorityQueue<>(queueSize(), LogQueueEntry::compare);
}

void add(LogQueueEntry t) throws IOException {
if (t.lc.next()) {
queue.add(t);
} else {
t.lc.close();
}
}

@Override
public boolean next() throws IOException {
for (;;) {
LogQueueEntry t = queue.poll();
if (t == null) {
return false;
}

refName = t.lc.getRefName();
updateIndex = t.lc.getUpdateIndex();
entry = t.lc.getReflogEntry();
boolean include = includeDeletes || entry != null;
skipShadowed(refName, updateIndex);
add(t);
if (include) {
return true;
}
}
}

private void skipShadowed(String name, long index) throws IOException {
for (;;) {
LogQueueEntry t = queue.peek();
if (t != null && name.equals(t.name()) && index == t.index()) {
add(queue.remove());
} else {
break;
}
}
}

@Override
public String getRefName() {
return refName;
}

@Override
public long getUpdateIndex() {
return updateIndex;
}

@Override
public ReflogEntry getReflogEntry() {
return entry;
}

@Override
public void close() {
while (!queue.isEmpty()) {
queue.remove().lc.close();
}
}
}

private static class LogQueueEntry {
static int compare(LogQueueEntry a, LogQueueEntry b) {
int cmp = a.name().compareTo(b.name());
if (cmp == 0) {
// higher update index sorts first.
cmp = Long.signum(b.index() - a.index());
}
if (cmp == 0) {
// higher index comes first.
cmp = b.stackIdx - a.stackIdx;
}
return cmp;
}

final LogCursor lc;
final int stackIdx;

LogQueueEntry(LogCursor lc, int stackIdx) {
this.lc = lc;
this.stackIdx = stackIdx;
}

String name() {
return lc.getRefName();
}

long index() {
return lc.getUpdateIndex();
}
}
}

Loading…
Cancel
Save