Browse Source

ApplyCommand: add a base-85 codec

Add an implementation for base-85 encoding and decoding [1]. Git binary
patches use this format.

Base-85 encoding assembles bytes as 32-bit MSB values, then converts
these values to base-85 numbers (always 5 bytes) encoded as printable
ASCII characters. Decoding base-85 is the reverse operation. Note
that decoding may overflow on invalid input as 85^5 > 2^32. Encodings
always have a length that is a multiple of 5. If input length is not
divisible by 4, padding bytes are (logically) added, which are ignored
when decoding. The encoding for n bytes has thus always exactly length
(n + 3) / 4 * 5 in integer arithmetic (truncating division).

Includes tests.

[1] https://datatracker.ietf.org/doc/html/rfc1924

Bug: 371725
Change-Id: Ib5b9a503cd62cf70e080a4fb38c8cd1eeeaebcfe
Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
tags/v5.12.0.202105261145-m3
Thomas Wolf 3 years ago
parent
commit
501fc0dadd

+ 87
- 0
org.eclipse.jgit.test/tst/org/eclipse/jgit/util/Base85Test.java View File

@@ -0,0 +1,87 @@
/*
* Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.util;

import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertThrows;
import static org.junit.Assert.assertTrue;

import java.nio.charset.StandardCharsets;

import org.junit.Test;

/**
* Tests for {@link Base85}.
*/
public class Base85Test {

private static final String VALID_CHARS = "0123456789"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+ "!#$%&()*+-;<=>?@^_`{|}~";

@Test
public void testChars() {
for (int i = 0; i < 256; i++) {
byte[] testData = { '1', '2', '3', '4', (byte) i };
if (VALID_CHARS.indexOf(i) >= 0) {
byte[] decoded = Base85.decode(testData, 4);
assertNotNull(decoded);
} else {
assertThrows(IllegalArgumentException.class,
() -> Base85.decode(testData, 4));
}
}
}

private void roundtrip(byte[] data, int expectedLength) {
byte[] encoded = Base85.encode(data);
assertEquals(expectedLength, encoded.length);
assertArrayEquals(data, Base85.decode(encoded, data.length));
}

private void roundtrip(String data, int expectedLength) {
roundtrip(data.getBytes(StandardCharsets.US_ASCII), expectedLength);
}

@Test
public void testPadding() {
roundtrip("", 0);
roundtrip("a", 5);
roundtrip("ab", 5);
roundtrip("abc", 5);
roundtrip("abcd", 5);
roundtrip("abcde", 10);
roundtrip("abcdef", 10);
roundtrip("abcdefg", 10);
roundtrip("abcdefgh", 10);
roundtrip("abcdefghi", 15);
}

@Test
public void testBinary() {
roundtrip(new byte[] { 1 }, 5);
roundtrip(new byte[] { 1, 2 }, 5);
roundtrip(new byte[] { 1, 2, 3 }, 5);
roundtrip(new byte[] { 1, 2, 3, 4 }, 5);
roundtrip(new byte[] { 1, 2, 3, 4, 5 }, 10);
roundtrip(new byte[] { 1, 2, 3, 4, 5, 0, 0, 0 }, 10);
roundtrip(new byte[] { 1, 2, 3, 4, 0, 0, 0, 5 }, 10);
}

@Test
public void testOverflow() {
IllegalArgumentException e = assertThrows(
IllegalArgumentException.class,
() -> Base85.decode(new byte[] { '~', '~', '~', '~', '~' }, 4));
assertTrue(e.getMessage().contains("overflow"));
}
}

+ 5
- 0
org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties View File

@@ -37,6 +37,11 @@ badRef=Bad ref: {0}: {1}
badSectionEntry=Bad section entry: {0}
badShallowLine=Bad shallow line: {0}
bareRepositoryNoWorkdirAndIndex=Bare Repository has neither a working tree, nor an index
base85invalidChar=Invalid base-85 character: 0x{0}
base85length=Base-85 encoded data must have a length that is a multiple of 5
base85overflow=Base-85 value overflow, does not fit into 32 bits: 0x{0}
base85tooLong=Extra base-85 encoded data for output size of {0} bytes
base85tooShort=Base-85 data decoded into less than {0} bytes
baseLengthIncorrect=base length incorrect
bitmapMissingObject=Bitmap at {0} is missing {1}.
bitmapsMustBePrepared=Bitmaps must be prepared before they may be written.

+ 5
- 0
org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java View File

@@ -65,6 +65,11 @@ public class JGitText extends TranslationBundle {
/***/ public String badSectionEntry;
/***/ public String badShallowLine;
/***/ public String bareRepositoryNoWorkdirAndIndex;
/***/ public String base85invalidChar;
/***/ public String base85length;
/***/ public String base85overflow;
/***/ public String base85tooLong;
/***/ public String base85tooShort;
/***/ public String baseLengthIncorrect;
/***/ public String bitmapMissingObject;
/***/ public String bitmapsMustBePrepared;

+ 195
- 0
org.eclipse.jgit/src/org/eclipse/jgit/util/Base85.java View File

@@ -0,0 +1,195 @@
/*
* Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.util;

import java.nio.charset.StandardCharsets;
import java.text.MessageFormat;
import java.util.Arrays;

import org.eclipse.jgit.internal.JGitText;

/**
* Base-85 encoder/decoder.
*
* @since 5.12
*/
public final class Base85 {

private static final byte[] ENCODE = ("0123456789" //$NON-NLS-1$
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" //$NON-NLS-1$
+ "abcdefghijklmnopqrstuvwxyz" //$NON-NLS-1$
+ "!#$%&()*+-;<=>?@^_`{|}~") //$NON-NLS-1$
.getBytes(StandardCharsets.US_ASCII);

private static final int[] DECODE = new int[256];

static {
Arrays.fill(DECODE, -1);
for (int i = 0; i < ENCODE.length; i++) {
DECODE[ENCODE[i]] = i;
}
}

private Base85() {
// No instantiation
}

/**
* Determines the length of the base-85 encoding for {@code rawLength}
* bytes.
*
* @param rawLength
* number of bytes to encode
* @return number of bytes needed for the base-85 encoding of
* {@code rawLength} bytes
*/
public static int encodedLength(int rawLength) {
return (rawLength + 3) / 4 * 5;
}

/**
* Encodes the given {@code data} in Base-85.
*
* @param data
* to encode
* @return encoded data
*/
public static byte[] encode(byte[] data) {
return encode(data, 0, data.length);
}

/**
* Encodes {@code length} bytes of {@code data} in Base-85, beginning at the
* {@code start} index.
*
* @param data
* to encode
* @param start
* index of the first byte to encode
* @param length
* number of bytes to encode
* @return encoded data
*/
public static byte[] encode(byte[] data, int start, int length) {
byte[] result = new byte[encodedLength(length)];
int end = start + length;
int in = start;
int out = 0;
while (in < end) {
// Accumulate remaining bytes MSB first as a 32bit value
long accumulator = ((long) (data[in++] & 0xFF)) << 24;
if (in < end) {
accumulator |= (data[in++] & 0xFF) << 16;
if (in < end) {
accumulator |= (data[in++] & 0xFF) << 8;
if (in < end) {
accumulator |= (data[in++] & 0xFF);
}
}
}
// Write the 32bit value in base-85 encoding, also MSB first
for (int i = 4; i >= 0; i--) {
result[out + i] = ENCODE[(int) (accumulator % 85)];
accumulator /= 85;
}
out += 5;
}
return result;
}

/**
* Decodes the Base-85 {@code encoded} data into a byte array of
* {@code expectedSize} bytes.
*
* @param encoded
* Base-85 encoded data
* @param expectedSize
* of the result
* @return the decoded bytes
* @throws IllegalArgumentException
* if expectedSize doesn't match, the encoded data has a length
* that is not a multiple of 5, or there are invalid characters
* in the encoded data
*/
public static byte[] decode(byte[] encoded, int expectedSize) {
return decode(encoded, 0, encoded.length, expectedSize);
}

/**
* Decodes {@code length} bytes of Base-85 {@code encoded} data, beginning
* at the {@code start} index, into a byte array of {@code expectedSize}
* bytes.
*
* @param encoded
* Base-85 encoded data
* @param start
* index at which the data to decode starts in {@code encoded}
* @param length
* of the Base-85 encoded data
* @param expectedSize
* of the result
* @return the decoded bytes
* @throws IllegalArgumentException
* if expectedSize doesn't match, {@code length} is not a
* multiple of 5, or there are invalid characters in the encoded
* data
*/
public static byte[] decode(byte[] encoded, int start, int length,
int expectedSize) {
if (length % 5 != 0) {
throw new IllegalArgumentException(JGitText.get().base85length);
}
byte[] result = new byte[expectedSize];
int end = start + length;
int in = start;
int out = 0;
while (in < end && out < expectedSize) {
// Accumulate 5 bytes, "MSB" first
long accumulator = 0;
for (int i = 4; i >= 0; i--) {
int val = DECODE[encoded[in++] & 0xFF];
if (val < 0) {
throw new IllegalArgumentException(MessageFormat.format(
JGitText.get().base85invalidChar,
Integer.toHexString(encoded[in - 1] & 0xFF)));
}
accumulator = accumulator * 85 + val;
}
if (accumulator > 0xFFFF_FFFFL) {
throw new IllegalArgumentException(
MessageFormat.format(JGitText.get().base85overflow,
Long.toHexString(accumulator)));
}
// Write remaining bytes, MSB first
result[out++] = (byte) (accumulator >>> 24);
if (out < expectedSize) {
result[out++] = (byte) (accumulator >>> 16);
if (out < expectedSize) {
result[out++] = (byte) (accumulator >>> 8);
if (out < expectedSize) {
result[out++] = (byte) accumulator;
}
}
}
}
// Should have exhausted 'in' and filled 'out' completely
if (in < end) {
throw new IllegalArgumentException(
MessageFormat.format(JGitText.get().base85tooLong,
Integer.valueOf(expectedSize)));
}
if (out < expectedSize) {
throw new IllegalArgumentException(
MessageFormat.format(JGitText.get().base85tooShort,
Integer.valueOf(expectedSize)));
}
return result;
}
}

Loading…
Cancel
Save