diff options
author | Shawn Pearce <spearce@spearce.org> | 2017-07-09 10:38:24 -0700 |
---|---|---|
committer | Shawn Pearce <spearce@spearce.org> | 2017-08-17 15:06:50 -0700 |
commit | 0ecc8367e610ae64ee96fd221080d9fab525aa87 (patch) | |
tree | c4b9a13c9baddaaa444d7438638a3f2ad6faf4f9 | |
parent | b9e818b556d15672ad589e1dbdf5f6f1ea557c78 (diff) | |
download | jgit-0ecc8367e610ae64ee96fd221080d9fab525aa87.tar.gz jgit-0ecc8367e610ae64ee96fd221080d9fab525aa87.zip |
reftable: create and write reftable files
This is a simple writer to create reftable formatted files. Follow-up
commits will add support for reading from reftable, debugging
utilities, and tests.
Change-Id: I3d520c3515c580144490b0b45433ea175a3e6e11
9 files changed, 2010 insertions, 0 deletions
diff --git a/org.eclipse.jgit/META-INF/MANIFEST.MF b/org.eclipse.jgit/META-INF/MANIFEST.MF index 4719ceb0e5..bbc500fe35 100644 --- a/org.eclipse.jgit/META-INF/MANIFEST.MF +++ b/org.eclipse.jgit/META-INF/MANIFEST.MF @@ -75,6 +75,7 @@ Export-Package: org.eclipse.jgit.annotations;version="4.9.0", org.eclipse.jgit.pgm, org.eclipse.jgit.pgm.test", org.eclipse.jgit.internal.storage.pack;version="4.9.0";x-friends:="org.eclipse.jgit.junit,org.eclipse.jgit.test,org.eclipse.jgit.pgm", + org.eclipse.jgit.internal.storage.reftable;version="4.9.0";x-friends:="org.eclipse.jgit.junit,org.eclipse.jgit.test,org.eclipse.jgit.pgm", org.eclipse.jgit.internal.storage.reftree;version="4.9.0";x-friends:="org.eclipse.jgit.junit,org.eclipse.jgit.test,org.eclipse.jgit.pgm", org.eclipse.jgit.lib;version="4.9.0"; uses:="org.eclipse.jgit.revwalk, diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties index 22e60cbb54..b91b18083a 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties @@ -475,6 +475,7 @@ openFilesMustBeAtLeast1=Open files must be >= 1 openingConnection=Opening connection operationCanceled=Operation {0} was canceled outputHasAlreadyBeenStarted=Output has already been started. +overflowedReftableBlock=Overflowed reftable block packChecksumMismatch=Pack checksum mismatch detected for pack file {0} packCorruptedWhileWritingToFilesystem=Pack corrupted while writing to filesystem packDoesNotMatchIndex=Pack {0} does not match index @@ -502,6 +503,7 @@ patchFormatException=Format error: {0} pathIsNotInWorkingDir=Path is not in working dir pathNotConfigured=Submodule path is not configured peeledLineBeforeRef=Peeled line before ref. +peeledRefIsRequired=Peeled ref is required. peerDidNotSupplyACompleteObjectGraph=peer did not supply a complete object graph personIdentEmailNonNull=E-mail address of PersonIdent must not be null. personIdentNameNonNull=Name of PersonIdent must not be null. @@ -663,6 +665,7 @@ unableToCreateNewObject=Unable to create new object: {0} unableToStore=Unable to store {0}. unableToWrite=Unable to write {0} unauthorized=Unauthorized +underflowedReftableBlock=Underflowed reftable block unencodeableFile=Unencodable file: {0} unexpectedCompareResult=Unexpected metadata comparison result: {0} unexpectedEndOfConfigFile=Unexpected end of config file diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java index 610b99c146..e4d7e7f6e2 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java @@ -534,6 +534,7 @@ public class JGitText extends TranslationBundle { /***/ public String openingConnection; /***/ public String operationCanceled; /***/ public String outputHasAlreadyBeenStarted; + /***/ public String overflowedReftableBlock; /***/ public String packChecksumMismatch; /***/ public String packCorruptedWhileWritingToFilesystem; /***/ public String packDoesNotMatchIndex; @@ -561,6 +562,7 @@ public class JGitText extends TranslationBundle { /***/ public String pathIsNotInWorkingDir; /***/ public String pathNotConfigured; /***/ public String peeledLineBeforeRef; + /***/ public String peeledRefIsRequired; /***/ public String peerDidNotSupplyACompleteObjectGraph; /***/ public String personIdentEmailNonNull; /***/ public String personIdentNameNonNull; @@ -722,6 +724,7 @@ public class JGitText extends TranslationBundle { /***/ public String unableToStore; /***/ public String unableToWrite; /***/ public String unauthorized; + /***/ public String underflowedReftableBlock; /***/ public String unencodeableFile; /***/ public String unexpectedCompareResult; /***/ public String unexpectedEndOfConfigFile; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockSizeTooSmallException.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockSizeTooSmallException.java new file mode 100644 index 0000000000..cb0f988b23 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockSizeTooSmallException.java @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.reftable; + +import java.io.IOException; + +/** Thrown if {@link ReftableWriter} cannot fit a reference. */ +public class BlockSizeTooSmallException extends IOException { + private static final long serialVersionUID = 1L; + + private final int minBlockSize; + + BlockSizeTooSmallException(int b) { + minBlockSize = b; + } + + /** @return minimum block size in bytes reftable requires to write a ref. */ + public int getMinimumBlockSize() { + return minBlockSize; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockWriter.java new file mode 100644 index 0000000000..8f3e889de6 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockWriter.java @@ -0,0 +1,601 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.reftable; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.FILE_HEADER_LEN; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.INDEX_BLOCK_TYPE; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.LOG_BLOCK_TYPE; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.LOG_DATA; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.LOG_NONE; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.MAX_RESTARTS; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.OBJ_BLOCK_TYPE; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.REF_BLOCK_TYPE; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.VALUE_1ID; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.VALUE_2ID; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.VALUE_NONE; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.VALUE_SYMREF; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.VALUE_TYPE_MASK; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.reverseUpdateIndex; +import static org.eclipse.jgit.internal.storage.reftable.ReftableOutputStream.computeVarintSize; +import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH; +import static org.eclipse.jgit.lib.Ref.Storage.NEW; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.PersonIdent; +import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.util.IntList; +import org.eclipse.jgit.util.LongList; +import org.eclipse.jgit.util.NB; + +/** Formats and writes blocks for {@link ReftableWriter}. */ +class BlockWriter { + private final byte blockType; + private final byte keyType; + private final List<Entry> entries; + private final int blockLimitBytes; + private final int restartInterval; + + private int entriesSumBytes; + private int restartCnt; + + BlockWriter(byte type, byte kt, int bs, int ri) { + blockType = type; + keyType = kt; + blockLimitBytes = bs; + restartInterval = ri; + entries = new ArrayList<>(estimateEntryCount(type, kt, bs)); + } + + private static int estimateEntryCount(byte blockType, byte keyType, + int blockLimitBytes) { + double avgBytesPerEntry; + switch (blockType) { + case REF_BLOCK_TYPE: + default: + avgBytesPerEntry = 35.31; + break; + + case OBJ_BLOCK_TYPE: + avgBytesPerEntry = 4.19; + break; + + case LOG_BLOCK_TYPE: + avgBytesPerEntry = 101.14; + break; + + case INDEX_BLOCK_TYPE: + switch (keyType) { + case REF_BLOCK_TYPE: + case LOG_BLOCK_TYPE: + default: + avgBytesPerEntry = 27.44; + break; + + case OBJ_BLOCK_TYPE: + avgBytesPerEntry = 11.57; + break; + } + } + + int cnt = (int) (Math.ceil(blockLimitBytes / avgBytesPerEntry)); + return Math.min(cnt, 4096); + } + + byte blockType() { + return blockType; + } + + boolean padBetweenBlocks() { + return padBetweenBlocks(blockType) + || (blockType == INDEX_BLOCK_TYPE && padBetweenBlocks(keyType)); + } + + static boolean padBetweenBlocks(byte type) { + return type == REF_BLOCK_TYPE || type == OBJ_BLOCK_TYPE; + } + + byte[] lastKey() { + return entries.get(entries.size() - 1).key; + } + + int currentSize() { + return computeBlockBytes(0, false); + } + + void mustAdd(Entry entry) throws BlockSizeTooSmallException { + if (!tryAdd(entry, true)) { + // Insanely long names need a larger block size. + throw blockSizeTooSmall(entry); + } + } + + boolean tryAdd(Entry entry) { + if (entry instanceof ObjEntry + && computeBlockBytes(entry.sizeBytes(), 1) > blockLimitBytes) { + // If the ObjEntry has so many ref block pointers that its + // encoding overflows any block, reconfigure it to tell readers to + // instead scan all refs for this ObjectId. That significantly + // shrinks the entry to a very small size, which may now fit into + // this block. + ((ObjEntry) entry).markScanRequired(); + } + + if (tryAdd(entry, true)) { + return true; + } else if (nextShouldBeRestart()) { + // It was time for another restart, but the entry doesn't fit + // with its complete key, as the block is nearly full. Try to + // force it to fit with prefix compression rather than waste + // the tail of the block with padding. + return tryAdd(entry, false); + } + return false; + } + + private boolean tryAdd(Entry entry, boolean tryRestart) { + byte[] key = entry.key; + int prefixLen = 0; + boolean restart = tryRestart && nextShouldBeRestart(); + if (!restart) { + Entry priorEntry = entries.get(entries.size() - 1); + byte[] prior = priorEntry.key; + prefixLen = commonPrefix(prior, prior.length, key); + if (prefixLen <= 5 /* "refs/" */ && keyType == REF_BLOCK_TYPE) { + // Force restart points at transitions between namespaces + // such as "refs/heads/" to "refs/tags/". + restart = true; + prefixLen = 0; + } else if (prefixLen == 0) { + restart = true; + } + } + + entry.restart = restart; + entry.prefixLen = prefixLen; + int entryBytes = entry.sizeBytes(); + if (computeBlockBytes(entryBytes, restart) > blockLimitBytes) { + return false; + } + + entriesSumBytes += entryBytes; + entries.add(entry); + if (restart) { + restartCnt++; + } + return true; + } + + private boolean nextShouldBeRestart() { + int cnt = entries.size(); + return (cnt == 0 || ((cnt + 1) % restartInterval) == 0) + && restartCnt < MAX_RESTARTS; + } + + private int computeBlockBytes(int entryBytes, boolean restart) { + return computeBlockBytes( + entriesSumBytes + entryBytes, + restartCnt + (restart ? 1 : 0)); + } + + private static int computeBlockBytes(int entryBytes, int restartCnt) { + return 4 // 4-byte block header + + entryBytes + + restartCnt * 3 // restart_offset + + 2; // 2-byte restart_count + } + + void writeTo(ReftableOutputStream os) throws IOException { + os.beginBlock(blockType); + IntList restarts = new IntList(restartCnt); + for (Entry entry : entries) { + if (entry.restart) { + restarts.add(os.bytesWrittenInBlock()); + } + entry.writeKey(os); + entry.writeValue(os); + } + if (restarts.size() == 0 || restarts.size() > MAX_RESTARTS) { + throw new IllegalStateException(); + } + for (int i = 0; i < restarts.size(); i++) { + os.writeInt24(restarts.get(i)); + } + os.writeInt16(restarts.size()); + os.flushBlock(); + } + + private BlockSizeTooSmallException blockSizeTooSmall(Entry entry) { + // Compute size required to fit this entry by itself. + int min = FILE_HEADER_LEN + computeBlockBytes(entry.sizeBytes(), 1); + return new BlockSizeTooSmallException(min); + } + + static int commonPrefix(byte[] a, int n, byte[] b) { + int len = Math.min(n, Math.min(a.length, b.length)); + for (int i = 0; i < len; i++) { + if (a[i] != b[i]) { + return i; + } + } + return len; + } + + static int encodeSuffixAndType(int sfx, int valueType) { + return (sfx << 3) | valueType; + } + + static int compare( + byte[] a, int ai, int aLen, + byte[] b, int bi, int bLen) { + int aEnd = ai + aLen; + int bEnd = bi + bLen; + while (ai < aEnd && bi < bEnd) { + int c = (a[ai++] & 0xff) - (b[bi++] & 0xff); + if (c != 0) { + return c; + } + } + return aLen - bLen; + } + + static abstract class Entry { + static int compare(Entry ea, Entry eb) { + byte[] a = ea.key; + byte[] b = eb.key; + return BlockWriter.compare(a, 0, a.length, b, 0, b.length); + } + + final byte[] key; + int prefixLen; + boolean restart; + + Entry(byte[] key) { + this.key = key; + } + + void writeKey(ReftableOutputStream os) { + int sfxLen = key.length - prefixLen; + os.writeVarint(prefixLen); + os.writeVarint(encodeSuffixAndType(sfxLen, valueType())); + os.write(key, prefixLen, sfxLen); + } + + int sizeBytes() { + int sfxLen = key.length - prefixLen; + int sfx = encodeSuffixAndType(sfxLen, valueType()); + return computeVarintSize(prefixLen) + + computeVarintSize(sfx) + + sfxLen + + valueSize(); + } + + abstract byte blockType(); + abstract int valueType(); + abstract int valueSize(); + abstract void writeValue(ReftableOutputStream os) throws IOException; + } + + static class IndexEntry extends Entry { + private final long blockPosition; + + IndexEntry(byte[] key, long blockPosition) { + super(key); + this.blockPosition = blockPosition; + } + + @Override + byte blockType() { + return INDEX_BLOCK_TYPE; + } + + @Override + int valueType() { + return 0; + } + + @Override + int valueSize() { + return computeVarintSize(blockPosition); + } + + @Override + void writeValue(ReftableOutputStream os) { + os.writeVarint(blockPosition); + } + } + + static class RefEntry extends Entry { + final Ref ref; + + RefEntry(Ref ref) { + super(nameUtf8(ref)); + this.ref = ref; + } + + @Override + byte blockType() { + return REF_BLOCK_TYPE; + } + + @Override + int valueType() { + if (ref.isSymbolic()) { + return VALUE_SYMREF; + } else if (ref.getStorage() == NEW && ref.getObjectId() == null) { + return VALUE_NONE; + } else if (ref.getPeeledObjectId() != null) { + return VALUE_2ID; + } else { + return VALUE_1ID; + } + } + + @Override + int valueSize() { + switch (valueType()) { + case VALUE_NONE: + return 0; + case VALUE_1ID: + return OBJECT_ID_LENGTH; + case VALUE_2ID: + return 2 * OBJECT_ID_LENGTH; + case VALUE_SYMREF: + if (ref.isSymbolic()) { + int nameLen = nameUtf8(ref.getTarget()).length; + return computeVarintSize(nameLen) + nameLen; + } + } + throw new IllegalStateException(); + } + + @Override + void writeValue(ReftableOutputStream os) throws IOException { + switch (valueType()) { + case VALUE_NONE: + return; + + case VALUE_1ID: { + ObjectId id1 = ref.getObjectId(); + if (!ref.isPeeled()) { + throw new IOException(JGitText.get().peeledRefIsRequired); + } else if (id1 == null) { + throw new IOException(JGitText.get().invalidId0); + } + os.writeId(id1); + return; + } + + case VALUE_2ID: { + ObjectId id1 = ref.getObjectId(); + ObjectId id2 = ref.getPeeledObjectId(); + if (!ref.isPeeled()) { + throw new IOException(JGitText.get().peeledRefIsRequired); + } else if (id1 == null || id2 == null) { + throw new IOException(JGitText.get().invalidId0); + } + os.writeId(id1); + os.writeId(id2); + return; + } + + case VALUE_SYMREF: + if (ref.isSymbolic()) { + os.writeVarintString(ref.getTarget().getName()); + return; + } + } + throw new IllegalStateException(); + } + + private static byte[] nameUtf8(Ref ref) { + return ref.getName().getBytes(UTF_8); + } + } + + static class ObjEntry extends Entry { + final LongList blockPos; + + ObjEntry(int idLen, ObjectId id, LongList blockPos) { + super(key(idLen, id)); + this.blockPos = blockPos; + } + + private static byte[] key(int idLen, ObjectId id) { + byte[] key = new byte[OBJECT_ID_LENGTH]; + id.copyRawTo(key, 0); + if (idLen < OBJECT_ID_LENGTH) { + return Arrays.copyOf(key, idLen); + } + return key; + } + + void markScanRequired() { + blockPos.clear(); + } + + @Override + byte blockType() { + return OBJ_BLOCK_TYPE; + } + + @Override + int valueType() { + int cnt = blockPos.size(); + return cnt != 0 && cnt <= VALUE_TYPE_MASK ? cnt : 0; + } + + @Override + int valueSize() { + int cnt = blockPos.size(); + if (cnt == 0) { + return computeVarintSize(0); + } + + int n = 0; + if (cnt > VALUE_TYPE_MASK) { + n += computeVarintSize(cnt); + } + n += computeVarintSize(blockPos.get(0)); + for (int j = 1; j < cnt; j++) { + long prior = blockPos.get(j - 1); + long b = blockPos.get(j); + n += computeVarintSize(b - prior); + } + return n; + } + + @Override + void writeValue(ReftableOutputStream os) throws IOException { + int cnt = blockPos.size(); + if (cnt == 0) { + os.writeVarint(0); + return; + } + + if (cnt > VALUE_TYPE_MASK) { + os.writeVarint(cnt); + } + os.writeVarint(blockPos.get(0)); + for (int j = 1; j < cnt; j++) { + long prior = blockPos.get(j - 1); + long b = blockPos.get(j); + os.writeVarint(b - prior); + } + } + } + + static class DeleteLogEntry extends Entry { + DeleteLogEntry(String refName, long updateIndex) { + super(LogEntry.key(refName, updateIndex)); + } + + @Override + byte blockType() { + return LOG_BLOCK_TYPE; + } + + @Override + int valueType() { + return LOG_NONE; + } + + @Override + int valueSize() { + return 0; + } + + @Override + void writeValue(ReftableOutputStream os) { + // Nothing in a delete log record. + } + } + + static class LogEntry extends Entry { + final ObjectId oldId; + final ObjectId newId; + final long timeSecs; + final short tz; + final byte[] name; + final byte[] email; + final byte[] msg; + + LogEntry(String refName, long updateIndex, PersonIdent who, + ObjectId oldId, ObjectId newId, String message) { + super(key(refName, updateIndex)); + + this.oldId = oldId; + this.newId = newId; + this.timeSecs = who.getWhen().getTime() / 1000L; + this.tz = (short) who.getTimeZoneOffset(); + this.name = who.getName().getBytes(UTF_8); + this.email = who.getEmailAddress().getBytes(UTF_8); + this.msg = message.getBytes(UTF_8); + } + + static byte[] key(String ref, long index) { + byte[] name = ref.getBytes(UTF_8); + byte[] key = Arrays.copyOf(name, name.length + 1 + 8); + NB.encodeInt64(key, key.length - 8, reverseUpdateIndex(index)); + return key; + } + + @Override + byte blockType() { + return LOG_BLOCK_TYPE; + } + + @Override + int valueType() { + return LOG_DATA; + } + + @Override + int valueSize() { + return 2 * OBJECT_ID_LENGTH + + computeVarintSize(name.length) + name.length + + computeVarintSize(email.length) + email.length + + computeVarintSize(timeSecs) + + 2 // tz + + computeVarintSize(msg.length) + msg.length; + } + + @Override + void writeValue(ReftableOutputStream os) { + os.writeId(oldId); + os.writeId(newId); + os.writeVarintString(name); + os.writeVarintString(email); + os.writeVarint(timeSecs); + os.writeInt16(tz); + os.writeVarintString(msg); + } + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableConfig.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableConfig.java new file mode 100644 index 0000000000..f7a1fbe2af --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableConfig.java @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.reftable; + +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.MAX_BLOCK_SIZE; + +import org.eclipse.jgit.lib.Config; +import org.eclipse.jgit.lib.Repository; + +/** Configuration used by a reftable writer when constructing the stream. */ +public class ReftableConfig { + private int refBlockSize = 4 << 10; + private int logBlockSize; + private int restartInterval; + private int maxIndexLevels; + private boolean alignBlocks = true; + private boolean indexObjects = true; + + /** Create a default configuration. */ + public ReftableConfig() { + } + + /** + * Create a configuration honoring the repository's settings. + * + * @param db + * the repository to read settings from. The repository is not + * retained by the new configuration, instead its settings are + * copied during the constructor. + */ + public ReftableConfig(Repository db) { + fromConfig(db.getConfig()); + } + + /** + * Create a configuration honoring settings in a {@link Config}. + * + * @param cfg + * the source to read settings from. The source is not retained + * by the new configuration, instead its settings are copied + * during the constructor. + */ + public ReftableConfig(Config cfg) { + fromConfig(cfg); + } + + /** + * Copy an existing configuration to a new instance. + * + * @param cfg + * the source configuration to copy from. + */ + public ReftableConfig(ReftableConfig cfg) { + this.refBlockSize = cfg.refBlockSize; + this.logBlockSize = cfg.logBlockSize; + this.restartInterval = cfg.restartInterval; + this.maxIndexLevels = cfg.maxIndexLevels; + this.alignBlocks = cfg.alignBlocks; + this.indexObjects = cfg.indexObjects; + } + + /** @return desired output block size for references, in bytes */ + public int getRefBlockSize() { + return refBlockSize; + } + + /** + * @param szBytes + * desired output block size for references, in bytes. + */ + public void setRefBlockSize(int szBytes) { + if (szBytes > MAX_BLOCK_SIZE) { + throw new IllegalArgumentException(); + } + refBlockSize = Math.max(0, szBytes); + } + + /** + * @return desired output block size for log entries, in bytes. If 0 the + * writer will default to {@code 2 * getRefBlockSize()}. + */ + public int getLogBlockSize() { + return logBlockSize; + } + + /** + * @param szBytes + * desired output block size for log entries, in bytes. If 0 will + * default to {@code 2 * getRefBlockSize()}. + */ + public void setLogBlockSize(int szBytes) { + if (szBytes > MAX_BLOCK_SIZE) { + throw new IllegalArgumentException(); + } + logBlockSize = Math.max(0, szBytes); + } + + /** @return number of references between binary search markers. */ + public int getRestartInterval() { + return restartInterval; + } + + /** + * @param interval + * number of references between binary search markers. If + * {@code interval} is 0 (default), the writer will select a + * default value based on the block size. + */ + public void setRestartInterval(int interval) { + restartInterval = Math.max(0, interval); + } + + /** @return maximum depth of the index; 0 for unlimited. */ + public int getMaxIndexLevels() { + return maxIndexLevels; + } + + /** + * @param levels + * maximum number of levels to use in indexes. Lower levels of + * the index respect {@link #getRefBlockSize()}, and the highest + * level may exceed that if the number of levels is limited. + */ + public void setMaxIndexLevels(int levels) { + maxIndexLevels = Math.max(0, levels); + } + + /** @return {@code true} if the writer should align blocks. */ + public boolean isAlignBlocks() { + return alignBlocks; + } + + /** + * @param align + * if {@code true} blocks are written aligned to multiples of + * {@link #getRefBlockSize()}. May increase file size due to NUL + * padding bytes added between blocks. Default is {@code true}. + */ + public void setAlignBlocks(boolean align) { + alignBlocks = align; + } + + /** @return {@code true} if the writer should index object to ref. */ + public boolean isIndexObjects() { + return indexObjects; + } + + /** + * @param index + * if {@code true} the reftable may include additional storage to + * efficiently map from {@code ObjectId} to reference names. By + * default, {@code true}. + */ + public void setIndexObjects(boolean index) { + indexObjects = index; + } + + /** + * Update properties by setting fields from the configuration. + * + * If a property's corresponding variable is not defined in the supplied + * configuration, then it is left unmodified. + * + * @param rc + * configuration to read properties from. + */ + public void fromConfig(Config rc) { + refBlockSize = rc.getInt("reftable", "blockSize", refBlockSize); //$NON-NLS-1$ //$NON-NLS-2$ + logBlockSize = rc.getInt("reftable", "logBlockSize", logBlockSize); //$NON-NLS-1$ //$NON-NLS-2$ + restartInterval = rc.getInt("reftable", "restartInterval", restartInterval); //$NON-NLS-1$ //$NON-NLS-2$ + maxIndexLevels = rc.getInt("reftable", "indexLevels", maxIndexLevels); //$NON-NLS-1$ //$NON-NLS-2$ + alignBlocks = rc.getBoolean("reftable", "alignBlocks", alignBlocks); //$NON-NLS-1$ //$NON-NLS-2$ + indexObjects = rc.getBoolean("reftable", "indexObjects", indexObjects); //$NON-NLS-1$ //$NON-NLS-2$ + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableConstants.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableConstants.java new file mode 100644 index 0000000000..0b89327582 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableConstants.java @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.reftable; + +class ReftableConstants { + static final byte[] FILE_HEADER_MAGIC = { 'R', 'E', 'F', 'T' }; + static final byte VERSION_1 = (byte) 1; + + static final int FILE_HEADER_LEN = 24; + static final int FILE_FOOTER_LEN = 68; + + static final byte FILE_BLOCK_TYPE = 'R'; + static final byte REF_BLOCK_TYPE = 'r'; + static final byte OBJ_BLOCK_TYPE = 'o'; + static final byte LOG_BLOCK_TYPE = 'g'; + static final byte INDEX_BLOCK_TYPE = 'i'; + + static final int VALUE_NONE = 0x0; + static final int VALUE_1ID = 0x1; + static final int VALUE_2ID = 0x2; + static final int VALUE_SYMREF = 0x3; + static final int VALUE_TYPE_MASK = 0x7; + + static final int LOG_NONE = 0x0; + static final int LOG_DATA = 0x1; + + static final int MAX_BLOCK_SIZE = (1 << 24) - 1; + static final int MAX_RESTARTS = 65535; + + static boolean isFileHeaderMagic(byte[] buf, int o, int n) { + return (n - o) >= FILE_HEADER_MAGIC.length + && buf[o + 0] == FILE_HEADER_MAGIC[0] + && buf[o + 1] == FILE_HEADER_MAGIC[1] + && buf[o + 2] == FILE_HEADER_MAGIC[2] + && buf[o + 3] == FILE_HEADER_MAGIC[3]; + } + + static long reverseUpdateIndex(long time) { + return 0xffffffffffffffffL - time; + } + + private ReftableConstants() { + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableOutputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableOutputStream.java new file mode 100644 index 0000000000..a24619b2c5 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableOutputStream.java @@ -0,0 +1,247 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.reftable; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.FILE_HEADER_LEN; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.INDEX_BLOCK_TYPE; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.LOG_BLOCK_TYPE; +import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; +import java.util.zip.Deflater; +import java.util.zip.DeflaterOutputStream; + +import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.util.NB; +import org.eclipse.jgit.util.io.CountingOutputStream; + +/** + * Wrapper to assist formatting a reftable to an {@link OutputStream}. + * <p> + * Internally buffers at block size boundaries, flushing only complete blocks to + * the {@code OutputStream}. + */ +class ReftableOutputStream extends OutputStream { + private final byte[] tmp = new byte[10]; + private final CountingOutputStream out; + private final boolean alignBlocks; + + private Deflater deflater; + private DeflaterOutputStream compressor; + + private int blockType; + private int blockSize; + private int blockStart; + private byte[] blockBuf; + private int cur; + private long paddingUsed; + + ReftableOutputStream(OutputStream os, int bs, boolean align) { + blockSize = bs; + blockBuf = new byte[bs]; + alignBlocks = align; + out = new CountingOutputStream(os); + } + + void setBlockSize(int bs) { + blockSize = bs; + } + + @Override + public void write(int b) { + ensureBytesAvailableInBlockBuf(1); + blockBuf[cur++] = (byte) b; + } + + @Override + public void write(byte[] b, int off, int cnt) { + ensureBytesAvailableInBlockBuf(cnt); + System.arraycopy(b, off, blockBuf, cur, cnt); + cur += cnt; + } + + int bytesWrittenInBlock() { + return cur; + } + + int bytesAvailableInBlock() { + return blockSize - cur; + } + + long paddingUsed() { + return paddingUsed; + } + + /** @return bytes flushed; excludes {@link #bytesWrittenInBlock()}. */ + long size() { + return out.getCount(); + } + + static int computeVarintSize(long val) { + int n = 1; + for (; (val >>>= 7) != 0; n++) { + val--; + } + return n; + } + + void writeVarint(long val) { + int n = tmp.length; + tmp[--n] = (byte) (val & 0x7f); + while ((val >>>= 7) != 0) { + tmp[--n] = (byte) (0x80 | (--val & 0x7F)); + } + write(tmp, n, tmp.length - n); + } + + void writeInt16(int val) { + ensureBytesAvailableInBlockBuf(2); + NB.encodeInt16(blockBuf, cur, val); + cur += 2; + } + + void writeInt24(int val) { + ensureBytesAvailableInBlockBuf(3); + NB.encodeInt24(blockBuf, cur, val); + cur += 3; + } + + void writeId(ObjectId id) { + ensureBytesAvailableInBlockBuf(OBJECT_ID_LENGTH); + id.copyRawTo(blockBuf, cur); + cur += OBJECT_ID_LENGTH; + } + + void writeVarintString(String s) { + writeVarintString(s.getBytes(UTF_8)); + } + + void writeVarintString(byte[] msg) { + writeVarint(msg.length); + write(msg, 0, msg.length); + } + + private void ensureBytesAvailableInBlockBuf(int cnt) { + if (cur + cnt > blockBuf.length) { + int n = Math.max(cur + cnt, blockBuf.length * 2); + blockBuf = Arrays.copyOf(blockBuf, n); + } + } + + void flushFileHeader() throws IOException { + if (cur == FILE_HEADER_LEN && out.getCount() == 0) { + out.write(blockBuf, 0, cur); + cur = 0; + } + } + + void beginBlock(byte type) { + blockType = type; + blockStart = cur; + cur += 4; // reserve space for 4-byte block header. + } + + void flushBlock() throws IOException { + if (cur > blockSize && blockType != INDEX_BLOCK_TYPE) { + throw new IOException(JGitText.get().overflowedReftableBlock); + } + NB.encodeInt32(blockBuf, blockStart, (blockType << 24) | cur); + + if (blockType == LOG_BLOCK_TYPE) { + // Log blocks are deflated after the block header. + out.write(blockBuf, 0, 4); + if (deflater != null) { + deflater.reset(); + } else { + deflater = new Deflater(Deflater.BEST_COMPRESSION); + compressor = new DeflaterOutputStream(out, deflater); + } + compressor.write(blockBuf, 4, cur - 4); + compressor.finish(); + } else { + // Other blocks are uncompressed. + out.write(blockBuf, 0, cur); + } + + cur = 0; + blockType = 0; + blockStart = 0; + } + + void padBetweenBlocksToNextBlock() throws IOException { + if (alignBlocks) { + long m = size() % blockSize; + if (m > 0) { + int pad = blockSize - (int) m; + ensureBytesAvailableInBlockBuf(pad); + Arrays.fill(blockBuf, 0, pad, (byte) 0); + out.write(blockBuf, 0, pad); + paddingUsed += pad; + } + } + } + + int estimatePadBetweenBlocks(int currentBlockSize) { + if (alignBlocks) { + long m = (size() + currentBlockSize) % blockSize; + return m > 0 ? blockSize - (int) m : 0; + } + return 0; + } + + void finishFile() throws IOException { + // File footer doesn't need patching for the block start. + // Just flush what has been buffered. + out.write(blockBuf, 0, cur); + cur = 0; + + if (deflater != null) { + deflater.end(); + } + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableWriter.java new file mode 100644 index 0000000000..45b759f952 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableWriter.java @@ -0,0 +1,792 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.reftable; + +import static java.lang.Math.log; +import static org.eclipse.jgit.internal.storage.reftable.BlockWriter.padBetweenBlocks; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.FILE_FOOTER_LEN; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.FILE_HEADER_LEN; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.FILE_HEADER_MAGIC; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.INDEX_BLOCK_TYPE; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.LOG_BLOCK_TYPE; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.MAX_BLOCK_SIZE; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.MAX_RESTARTS; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.OBJ_BLOCK_TYPE; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.REF_BLOCK_TYPE; +import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.VERSION_1; +import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.zip.CRC32; + +import org.eclipse.jgit.annotations.Nullable; +import org.eclipse.jgit.internal.storage.reftable.BlockWriter.DeleteLogEntry; +import org.eclipse.jgit.internal.storage.reftable.BlockWriter.Entry; +import org.eclipse.jgit.internal.storage.reftable.BlockWriter.IndexEntry; +import org.eclipse.jgit.internal.storage.reftable.BlockWriter.LogEntry; +import org.eclipse.jgit.internal.storage.reftable.BlockWriter.ObjEntry; +import org.eclipse.jgit.internal.storage.reftable.BlockWriter.RefEntry; +import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectIdOwnerMap; +import org.eclipse.jgit.lib.ObjectIdSubclassMap; +import org.eclipse.jgit.lib.PersonIdent; +import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.util.LongList; +import org.eclipse.jgit.util.NB; + +/** + * Writes a reftable formatted file. + * <p> + * A reftable can be written in a streaming fashion, provided the caller sorts + * all references. A {@link ReftableWriter} is single-use, and not thread-safe. + */ +public class ReftableWriter { + private ReftableConfig config; + private int refBlockSize; + private int logBlockSize; + private int restartInterval; + private int maxIndexLevels; + private boolean alignBlocks; + private boolean indexObjects; + + private long minUpdateIndex; + private long maxUpdateIndex; + + private ReftableOutputStream out; + private ObjectIdSubclassMap<RefList> obj2ref; + + private BlockWriter cur; + private Section refs; + private Section objs; + private Section logs; + private int objIdLen; + private Stats stats; + + /** Initialize a writer with a default configuration. */ + public ReftableWriter() { + this(new ReftableConfig()); + } + + /** + * Initialize a writer with a specific configuration. + * + * @param cfg + * configuration for the writer. + */ + public ReftableWriter(ReftableConfig cfg) { + config = cfg; + } + + /** + * @param cfg + * configuration for the writer. + * @return {@code this} + */ + public ReftableWriter setConfig(ReftableConfig cfg) { + this.config = cfg != null ? cfg : new ReftableConfig(); + return this; + } + + /** + * @param min + * the minimum update index for log entries that appear in this + * reftable. This should be 1 higher than the prior reftable's + * {@code maxUpdateIndex} if this table will be used in a stack. + * @return {@code this} + */ + public ReftableWriter setMinUpdateIndex(long min) { + minUpdateIndex = min; + return this; + } + + /** + * @param max + * the maximum update index for log entries that appear in this + * reftable. This should be at least 1 higher than the prior + * reftable's {@code maxUpdateIndex} if this table will be used + * in a stack. + * @return {@code this} + */ + public ReftableWriter setMaxUpdateIndex(long max) { + maxUpdateIndex = max; + return this; + } + + /** + * Begin writing the reftable. + * + * @param os + * stream to write the table to. Caller is responsible for + * closing the stream after invoking {@link #finish()}. + * @return {@code this} + * @throws IOException + * if reftable header cannot be written. + */ + public ReftableWriter begin(OutputStream os) throws IOException { + refBlockSize = config.getRefBlockSize(); + logBlockSize = config.getLogBlockSize(); + restartInterval = config.getRestartInterval(); + maxIndexLevels = config.getMaxIndexLevels(); + alignBlocks = config.isAlignBlocks(); + indexObjects = config.isIndexObjects(); + + if (refBlockSize <= 0) { + refBlockSize = 4 << 10; + } else if (refBlockSize > MAX_BLOCK_SIZE) { + throw new IllegalArgumentException(); + } + if (logBlockSize <= 0) { + logBlockSize = 2 * refBlockSize; + } + if (restartInterval <= 0) { + restartInterval = refBlockSize < (60 << 10) ? 16 : 64; + } + + out = new ReftableOutputStream(os, refBlockSize, alignBlocks); + refs = new Section(REF_BLOCK_TYPE); + if (indexObjects) { + obj2ref = new ObjectIdSubclassMap<>(); + } + writeFileHeader(); + return this; + } + + /** + * Sort a collection of references and write them to the reftable. + * + * @param refsToPack + * references to sort and write. + * @return {@code this} + * @throws IOException + * if reftable cannot be written. + */ + public ReftableWriter sortAndWriteRefs(Collection<Ref> refsToPack) + throws IOException { + Iterator<RefEntry> itr = refsToPack.stream() + .map(RefEntry::new) + .sorted(Entry::compare) + .iterator(); + while (itr.hasNext()) { + RefEntry entry = itr.next(); + long blockPos = refs.write(entry); + indexRef(entry.ref, blockPos); + } + return this; + } + + /** + * Write one reference to the reftable. + * <p> + * References must be passed in sorted order. + * + * @param ref + * the reference to store. + * @throws IOException + * if reftable cannot be written. + */ + public void writeRef(Ref ref) throws IOException { + long blockPos = refs.write(new RefEntry(ref)); + indexRef(ref, blockPos); + } + + private void indexRef(Ref ref, long blockPos) { + if (indexObjects && !ref.isSymbolic()) { + indexId(ref.getObjectId(), blockPos); + indexId(ref.getPeeledObjectId(), blockPos); + } + } + + private void indexId(ObjectId id, long blockPos) { + if (id != null) { + RefList l = obj2ref.get(id); + if (l == null) { + l = new RefList(id); + obj2ref.add(l); + } + l.addBlock(blockPos); + } + } + + /** + * Write one reflog entry to the reftable. + * <p> + * Reflog entries must be written in reference name and descending + * {@code updateIndex} (highest first) order. + * + * @param ref + * name of the reference. + * @param updateIndex + * identifier of the transaction that created the log record. The + * {@code updateIndex} must be unique within the scope of + * {@code ref}, and must be within the bounds defined by + * {@code minUpdateIndex <= updateIndex <= maxUpdateIndex}. + * @param who + * committer of the reflog entry. + * @param oldId + * prior id; pass {@link ObjectId#zeroId()} for creations. + * @param newId + * new id; pass {@link ObjectId#zeroId()} for deletions. + * @param message + * optional message (may be null). + * @throws IOException + * if reftable cannot be written. + */ + public void writeLog(String ref, long updateIndex, PersonIdent who, + ObjectId oldId, ObjectId newId, @Nullable String message) + throws IOException { + String msg = message != null ? message : ""; //$NON-NLS-1$ + beginLog(); + logs.write(new LogEntry(ref, updateIndex, who, oldId, newId, msg)); + } + + /** + * Record deletion of one reflog entry in this reftable. + * + * <p> + * The deletion can shadow an entry stored in a lower table in the stack. + * This is useful for {@code refs/stash} and dropping an entry from its + * reflog. + * <p> + * Deletion must be properly interleaved in sorted updateIndex order with + * any other logs written by + * {@link #writeLog(String, long, PersonIdent, ObjectId, ObjectId, String)}. + * + * @param ref + * the ref to delete (hide) a reflog entry from. + * @param updateIndex + * the update index that must be hidden. + * @throws IOException + * if reftable cannot be written. + */ + public void deleteLog(String ref, long updateIndex) throws IOException { + beginLog(); + logs.write(new DeleteLogEntry(ref, updateIndex)); + } + + private void beginLog() throws IOException { + if (logs == null) { + finishRefAndObjSections(); // close prior ref blocks and their index, if present. + out.flushFileHeader(); + out.setBlockSize(logBlockSize); + logs = new Section(LOG_BLOCK_TYPE); + } + } + + /** + * @return an estimate of the current size in bytes of the reftable, if it + * was finished right now. Estimate is only accurate if + * {@link ReftableConfig#setIndexObjects(boolean)} is {@code false} + * and {@link ReftableConfig#setMaxIndexLevels(int)} is {@code 1}. + */ + public long estimateTotalBytes() { + long bytes = out.size(); + if (bytes == 0) { + bytes += FILE_HEADER_LEN; + } + if (cur != null) { + long curBlockPos = out.size(); + int sz = cur.currentSize(); + bytes += sz; + + IndexBuilder idx = null; + if (cur.blockType() == REF_BLOCK_TYPE) { + idx = refs.idx; + } else if (cur.blockType() == LOG_BLOCK_TYPE) { + idx = logs.idx; + } + if (idx != null && shouldHaveIndex(idx)) { + if (idx == refs.idx) { + bytes += out.estimatePadBetweenBlocks(sz); + } + bytes += idx.estimateBytes(curBlockPos); + } + } + bytes += FILE_FOOTER_LEN; + return bytes; + } + + /** + * Finish writing the reftable by writing its trailer. + * + * @return {@code this} + * @throws IOException + * if reftable cannot be written. + */ + public ReftableWriter finish() throws IOException { + finishRefAndObjSections(); + finishLogSection(); + writeFileFooter(); + out.finishFile(); + + stats = new Stats(this, out); + out = null; + obj2ref = null; + cur = null; + refs = null; + objs = null; + logs = null; + return this; + } + + private void finishRefAndObjSections() throws IOException { + if (cur != null && cur.blockType() == REF_BLOCK_TYPE) { + refs.finishSectionMaybeWriteIndex(); + if (indexObjects && !obj2ref.isEmpty() && refs.idx.bytes > 0) { + writeObjBlocks(); + } + obj2ref = null; + } + } + + private void writeObjBlocks() throws IOException { + List<RefList> sorted = sortById(obj2ref); + obj2ref = null; + objIdLen = shortestUniqueAbbreviation(sorted); + + out.padBetweenBlocksToNextBlock(); + objs = new Section(OBJ_BLOCK_TYPE); + objs.entryCnt = sorted.size(); + for (RefList l : sorted) { + objs.write(new ObjEntry(objIdLen, l, l.blockPos)); + } + objs.finishSectionMaybeWriteIndex(); + } + + private void finishLogSection() throws IOException { + if (cur != null && cur.blockType() == LOG_BLOCK_TYPE) { + logs.finishSectionMaybeWriteIndex(); + } + } + + private boolean shouldHaveIndex(IndexBuilder idx) { + int threshold; + if (idx == refs.idx && alignBlocks) { + threshold = 4; + } else { + threshold = 1; + } + return idx.entries.size() + (cur != null ? 1 : 0) > threshold; + } + + private void writeFileHeader() { + byte[] hdr = new byte[FILE_HEADER_LEN]; + encodeHeader(hdr); + out.write(hdr, 0, FILE_HEADER_LEN); + } + + private void encodeHeader(byte[] hdr) { + System.arraycopy(FILE_HEADER_MAGIC, 0, hdr, 0, 4); + int bs = alignBlocks ? refBlockSize : 0; + NB.encodeInt32(hdr, 4, (VERSION_1 << 24) | bs); + NB.encodeInt64(hdr, 8, minUpdateIndex); + NB.encodeInt64(hdr, 16, maxUpdateIndex); + } + + private void writeFileFooter() { + int ftrLen = FILE_FOOTER_LEN; + byte[] ftr = new byte[ftrLen]; + encodeHeader(ftr); + + NB.encodeInt64(ftr, 24, indexPosition(refs)); + NB.encodeInt64(ftr, 32, (firstBlockPosition(objs) << 5) | objIdLen); + NB.encodeInt64(ftr, 40, indexPosition(objs)); + NB.encodeInt64(ftr, 48, firstBlockPosition(logs)); + NB.encodeInt64(ftr, 56, indexPosition(logs)); + + CRC32 crc = new CRC32(); + crc.update(ftr, 0, ftrLen - 4); + NB.encodeInt32(ftr, ftrLen - 4, (int) crc.getValue()); + + out.write(ftr, 0, ftrLen); + } + + private static long firstBlockPosition(@Nullable Section s) { + return s != null ? s.firstBlockPosition : 0; + } + + private static long indexPosition(@Nullable Section s) { + return s != null && s.idx != null ? s.idx.rootPosition : 0; + } + + /** @return statistics of the last written reftable. */ + public Stats getStats() { + return stats; + } + + /** Statistics about a written reftable. */ + public static class Stats { + private final int refBlockSize; + private final int logBlockSize; + private final int restartInterval; + + private final long minUpdateIndex; + private final long maxUpdateIndex; + + private final long refCnt; + private final long objCnt; + private final int objIdLen; + private final long logCnt; + private final long refBytes; + private final long objBytes; + private final long logBytes; + private final long paddingUsed; + private final long totalBytes; + + private final int refIndexSize; + private final int refIndexLevels; + private final int objIndexSize; + private final int objIndexLevels; + + Stats(ReftableWriter w, ReftableOutputStream o) { + refBlockSize = w.refBlockSize; + logBlockSize = w.logBlockSize; + restartInterval = w.restartInterval; + + minUpdateIndex = w.minUpdateIndex; + maxUpdateIndex = w.maxUpdateIndex; + paddingUsed = o.paddingUsed(); + totalBytes = o.size(); + + refCnt = w.refs.entryCnt; + refBytes = w.refs.bytes; + + objCnt = w.objs != null ? w.objs.entryCnt : 0; + objBytes = w.objs != null ? w.objs.bytes : 0; + objIdLen = w.objIdLen; + + logCnt = w.logs != null ? w.logs.entryCnt : 0; + logBytes = w.logs != null ? w.logs.bytes : 0; + + IndexBuilder refIdx = w.refs.idx; + refIndexSize = refIdx.bytes; + refIndexLevels = refIdx.levels; + + IndexBuilder objIdx = w.objs != null ? w.objs.idx : null; + objIndexSize = objIdx != null ? objIdx.bytes : 0; + objIndexLevels = objIdx != null ? objIdx.levels : 0; + } + + /** @return number of bytes in a ref block. */ + public int refBlockSize() { + return refBlockSize; + } + + /** @return number of bytes to compress into a log block. */ + public int logBlockSize() { + return logBlockSize; + } + + /** @return number of references between binary search markers. */ + public int restartInterval() { + return restartInterval; + } + + /** @return smallest update index contained in this reftable. */ + public long minUpdateIndex() { + return minUpdateIndex; + } + + /** @return largest update index contained in this reftable. */ + public long maxUpdateIndex() { + return maxUpdateIndex; + } + + /** @return total number of references in the reftable. */ + public long refCount() { + return refCnt; + } + + /** @return number of unique objects in the reftable. */ + public long objCount() { + return objCnt; + } + + /** @return total number of log records in the reftable. */ + public long logCount() { + return logCnt; + } + + /** @return number of bytes for references, including ref index. */ + public long refBytes() { + return refBytes; + } + + /** @return number of bytes for objects, including object index. */ + public long objBytes() { + return objBytes; + } + + /** @return number of bytes for log, including log index. */ + public long logBytes() { + return logBytes; + } + + /** @return total number of bytes in the reftable. */ + public long totalBytes() { + return totalBytes; + } + + /** @return bytes of padding used to maintain block alignment. */ + public long paddingBytes() { + return paddingUsed; + } + + /** @return number of bytes in the ref index; 0 if no index was used. */ + public int refIndexSize() { + return refIndexSize; + } + + /** @return number of levels in the ref index. */ + public int refIndexLevels() { + return refIndexLevels; + } + + /** @return number of bytes in the object index; 0 if no index. */ + public int objIndexSize() { + return objIndexSize; + } + + /** @return number of levels in the object index. */ + public int objIndexLevels() { + return objIndexLevels; + } + + /** + * @return number of bytes required to uniquely identify all objects in + * the reftable. Unique abbreviations in hex would be + * {@code 2 * objIdLength()}. + */ + public int objIdLength() { + return objIdLen; + } + } + + private static List<RefList> sortById(ObjectIdSubclassMap<RefList> m) { + List<RefList> s = new ArrayList<>(m.size()); + for (RefList l : m) { + s.add(l); + } + Collections.sort(s); + return s; + } + + private static int shortestUniqueAbbreviation(List<RefList> in) { + // Estimate minimum number of bytes necessary for unique abbreviations. + int bytes = Math.max(2, (int) (log(in.size()) / log(8))); + Set<AbbreviatedObjectId> tmp = new HashSet<>((int) (in.size() * 0.75f)); + retry: for (;;) { + int hexLen = bytes * 2; + for (ObjectId id : in) { + AbbreviatedObjectId a = id.abbreviate(hexLen); + if (!tmp.add(a)) { + if (++bytes >= OBJECT_ID_LENGTH) { + return OBJECT_ID_LENGTH; + } + tmp.clear(); + continue retry; + } + } + return bytes; + } + } + + private static class RefList extends ObjectIdOwnerMap.Entry { + final LongList blockPos = new LongList(2); + + RefList(AnyObjectId id) { + super(id); + } + + void addBlock(long pos) { + if (!blockPos.contains(pos)) { + blockPos.add(pos); + } + } + } + + private class Section { + final IndexBuilder idx; + final long firstBlockPosition; + + long entryCnt; + long bytes; + + Section(byte keyType) { + idx = new IndexBuilder(keyType); + firstBlockPosition = out.size(); + } + + long write(BlockWriter.Entry entry) throws IOException { + if (cur == null) { + beginBlock(entry); + } else if (!cur.tryAdd(entry)) { + flushCurBlock(); + if (cur.padBetweenBlocks()) { + out.padBetweenBlocksToNextBlock(); + } + beginBlock(entry); + } + entryCnt++; + return out.size(); + } + + private void beginBlock(BlockWriter.Entry entry) + throws BlockSizeTooSmallException { + byte blockType = entry.blockType(); + int bs = out.bytesAvailableInBlock(); + cur = new BlockWriter(blockType, idx.keyType, bs, restartInterval); + cur.mustAdd(entry); + } + + void flushCurBlock() throws IOException { + idx.entries.add(new IndexEntry(cur.lastKey(), out.size())); + cur.writeTo(out); + } + + void finishSectionMaybeWriteIndex() throws IOException { + flushCurBlock(); + cur = null; + if (shouldHaveIndex(idx)) { + idx.writeIndex(); + } + bytes = out.size() - firstBlockPosition; + } + } + + private class IndexBuilder { + final byte keyType; + List<IndexEntry> entries = new ArrayList<>(); + long rootPosition; + int bytes; + int levels; + + IndexBuilder(byte kt) { + keyType = kt; + } + + int estimateBytes(long curBlockPos) { + BlockWriter b = new BlockWriter( + INDEX_BLOCK_TYPE, keyType, + MAX_BLOCK_SIZE, + Math.max(restartInterval, entries.size() / MAX_RESTARTS)); + try { + for (Entry e : entries) { + b.mustAdd(e); + } + if (cur != null) { + b.mustAdd(new IndexEntry(cur.lastKey(), curBlockPos)); + } + } catch (BlockSizeTooSmallException e) { + return b.currentSize(); + } + return b.currentSize(); + } + + void writeIndex() throws IOException { + if (padBetweenBlocks(keyType)) { + out.padBetweenBlocksToNextBlock(); + } + long startPos = out.size(); + writeMultiLevelIndex(entries); + bytes = (int) (out.size() - startPos); + entries = null; + } + + private void writeMultiLevelIndex(List<IndexEntry> keys) + throws IOException { + levels = 1; + while (maxIndexLevels == 0 || levels < maxIndexLevels) { + keys = writeOneLevel(keys); + if (keys == null) { + return; + } + levels++; + } + + // When maxIndexLevels has restricted the writer, write one + // index block with the entire remaining set of keys. + BlockWriter b = new BlockWriter( + INDEX_BLOCK_TYPE, keyType, + MAX_BLOCK_SIZE, + Math.max(restartInterval, keys.size() / MAX_RESTARTS)); + for (Entry e : keys) { + b.mustAdd(e); + } + rootPosition = out.size(); + b.writeTo(out); + } + + private List<IndexEntry> writeOneLevel(List<IndexEntry> keys) + throws IOException { + Section thisLevel = new Section(keyType); + for (Entry e : keys) { + thisLevel.write(e); + } + if (!thisLevel.idx.entries.isEmpty()) { + thisLevel.flushCurBlock(); + if (cur.padBetweenBlocks()) { + out.padBetweenBlocksToNextBlock(); + } + cur = null; + return thisLevel.idx.entries; + } + + // The current block fit entire level; make it the root. + rootPosition = out.size(); + cur.writeTo(out); + cur = null; + return null; + } + } +} |