Add an implementation for base-85 encoding and decoding [1]. Git binary patches use this format. Base-85 encoding assembles bytes as 32-bit MSB values, then converts these values to base-85 numbers (always 5 bytes) encoded as printable ASCII characters. Decoding base-85 is the reverse operation. Note that decoding may overflow on invalid input as 85^5 > 2^32. Encodings always have a length that is a multiple of 5. If input length is not divisible by 4, padding bytes are (logically) added, which are ignored when decoding. The encoding for n bytes has thus always exactly length (n + 3) / 4 * 5 in integer arithmetic (truncating division). Includes tests. [1] https://datatracker.ietf.org/doc/html/rfc1924 Bug: 371725 Change-Id: Ib5b9a503cd62cf70e080a4fb38c8cd1eeeaebcfe Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch> Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>tags/v5.12.0.202105261145-m3
/* | |||||
* Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others | |||||
* | |||||
* This program and the accompanying materials are made available under the | |||||
* terms of the Eclipse Distribution License v. 1.0 which is available at | |||||
* https://www.eclipse.org/org/documents/edl-v10.php. | |||||
* | |||||
* SPDX-License-Identifier: BSD-3-Clause | |||||
*/ | |||||
package org.eclipse.jgit.util; | |||||
import static org.junit.Assert.assertArrayEquals; | |||||
import static org.junit.Assert.assertEquals; | |||||
import static org.junit.Assert.assertNotNull; | |||||
import static org.junit.Assert.assertThrows; | |||||
import static org.junit.Assert.assertTrue; | |||||
import java.nio.charset.StandardCharsets; | |||||
import org.junit.Test; | |||||
/** | |||||
* Tests for {@link Base85}. | |||||
*/ | |||||
public class Base85Test { | |||||
private static final String VALID_CHARS = "0123456789" | |||||
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | |||||
+ "!#$%&()*+-;<=>?@^_`{|}~"; | |||||
@Test | |||||
public void testChars() { | |||||
for (int i = 0; i < 256; i++) { | |||||
byte[] testData = { '1', '2', '3', '4', (byte) i }; | |||||
if (VALID_CHARS.indexOf(i) >= 0) { | |||||
byte[] decoded = Base85.decode(testData, 4); | |||||
assertNotNull(decoded); | |||||
} else { | |||||
assertThrows(IllegalArgumentException.class, | |||||
() -> Base85.decode(testData, 4)); | |||||
} | |||||
} | |||||
} | |||||
private void roundtrip(byte[] data, int expectedLength) { | |||||
byte[] encoded = Base85.encode(data); | |||||
assertEquals(expectedLength, encoded.length); | |||||
assertArrayEquals(data, Base85.decode(encoded, data.length)); | |||||
} | |||||
private void roundtrip(String data, int expectedLength) { | |||||
roundtrip(data.getBytes(StandardCharsets.US_ASCII), expectedLength); | |||||
} | |||||
@Test | |||||
public void testPadding() { | |||||
roundtrip("", 0); | |||||
roundtrip("a", 5); | |||||
roundtrip("ab", 5); | |||||
roundtrip("abc", 5); | |||||
roundtrip("abcd", 5); | |||||
roundtrip("abcde", 10); | |||||
roundtrip("abcdef", 10); | |||||
roundtrip("abcdefg", 10); | |||||
roundtrip("abcdefgh", 10); | |||||
roundtrip("abcdefghi", 15); | |||||
} | |||||
@Test | |||||
public void testBinary() { | |||||
roundtrip(new byte[] { 1 }, 5); | |||||
roundtrip(new byte[] { 1, 2 }, 5); | |||||
roundtrip(new byte[] { 1, 2, 3 }, 5); | |||||
roundtrip(new byte[] { 1, 2, 3, 4 }, 5); | |||||
roundtrip(new byte[] { 1, 2, 3, 4, 5 }, 10); | |||||
roundtrip(new byte[] { 1, 2, 3, 4, 5, 0, 0, 0 }, 10); | |||||
roundtrip(new byte[] { 1, 2, 3, 4, 0, 0, 0, 5 }, 10); | |||||
} | |||||
@Test | |||||
public void testOverflow() { | |||||
IllegalArgumentException e = assertThrows( | |||||
IllegalArgumentException.class, | |||||
() -> Base85.decode(new byte[] { '~', '~', '~', '~', '~' }, 4)); | |||||
assertTrue(e.getMessage().contains("overflow")); | |||||
} | |||||
} |
badSectionEntry=Bad section entry: {0} | badSectionEntry=Bad section entry: {0} | ||||
badShallowLine=Bad shallow line: {0} | badShallowLine=Bad shallow line: {0} | ||||
bareRepositoryNoWorkdirAndIndex=Bare Repository has neither a working tree, nor an index | bareRepositoryNoWorkdirAndIndex=Bare Repository has neither a working tree, nor an index | ||||
base85invalidChar=Invalid base-85 character: 0x{0} | |||||
base85length=Base-85 encoded data must have a length that is a multiple of 5 | |||||
base85overflow=Base-85 value overflow, does not fit into 32 bits: 0x{0} | |||||
base85tooLong=Extra base-85 encoded data for output size of {0} bytes | |||||
base85tooShort=Base-85 data decoded into less than {0} bytes | |||||
baseLengthIncorrect=base length incorrect | baseLengthIncorrect=base length incorrect | ||||
bitmapMissingObject=Bitmap at {0} is missing {1}. | bitmapMissingObject=Bitmap at {0} is missing {1}. | ||||
bitmapsMustBePrepared=Bitmaps must be prepared before they may be written. | bitmapsMustBePrepared=Bitmaps must be prepared before they may be written. |
/***/ public String badSectionEntry; | /***/ public String badSectionEntry; | ||||
/***/ public String badShallowLine; | /***/ public String badShallowLine; | ||||
/***/ public String bareRepositoryNoWorkdirAndIndex; | /***/ public String bareRepositoryNoWorkdirAndIndex; | ||||
/***/ public String base85invalidChar; | |||||
/***/ public String base85length; | |||||
/***/ public String base85overflow; | |||||
/***/ public String base85tooLong; | |||||
/***/ public String base85tooShort; | |||||
/***/ public String baseLengthIncorrect; | /***/ public String baseLengthIncorrect; | ||||
/***/ public String bitmapMissingObject; | /***/ public String bitmapMissingObject; | ||||
/***/ public String bitmapsMustBePrepared; | /***/ public String bitmapsMustBePrepared; |
/* | |||||
* Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others | |||||
* | |||||
* This program and the accompanying materials are made available under the | |||||
* terms of the Eclipse Distribution License v. 1.0 which is available at | |||||
* https://www.eclipse.org/org/documents/edl-v10.php. | |||||
* | |||||
* SPDX-License-Identifier: BSD-3-Clause | |||||
*/ | |||||
package org.eclipse.jgit.util; | |||||
import java.nio.charset.StandardCharsets; | |||||
import java.text.MessageFormat; | |||||
import java.util.Arrays; | |||||
import org.eclipse.jgit.internal.JGitText; | |||||
/** | |||||
* Base-85 encoder/decoder. | |||||
* | |||||
* @since 5.12 | |||||
*/ | |||||
public final class Base85 { | |||||
private static final byte[] ENCODE = ("0123456789" //$NON-NLS-1$ | |||||
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" //$NON-NLS-1$ | |||||
+ "abcdefghijklmnopqrstuvwxyz" //$NON-NLS-1$ | |||||
+ "!#$%&()*+-;<=>?@^_`{|}~") //$NON-NLS-1$ | |||||
.getBytes(StandardCharsets.US_ASCII); | |||||
private static final int[] DECODE = new int[256]; | |||||
static { | |||||
Arrays.fill(DECODE, -1); | |||||
for (int i = 0; i < ENCODE.length; i++) { | |||||
DECODE[ENCODE[i]] = i; | |||||
} | |||||
} | |||||
private Base85() { | |||||
// No instantiation | |||||
} | |||||
/** | |||||
* Determines the length of the base-85 encoding for {@code rawLength} | |||||
* bytes. | |||||
* | |||||
* @param rawLength | |||||
* number of bytes to encode | |||||
* @return number of bytes needed for the base-85 encoding of | |||||
* {@code rawLength} bytes | |||||
*/ | |||||
public static int encodedLength(int rawLength) { | |||||
return (rawLength + 3) / 4 * 5; | |||||
} | |||||
/** | |||||
* Encodes the given {@code data} in Base-85. | |||||
* | |||||
* @param data | |||||
* to encode | |||||
* @return encoded data | |||||
*/ | |||||
public static byte[] encode(byte[] data) { | |||||
return encode(data, 0, data.length); | |||||
} | |||||
/** | |||||
* Encodes {@code length} bytes of {@code data} in Base-85, beginning at the | |||||
* {@code start} index. | |||||
* | |||||
* @param data | |||||
* to encode | |||||
* @param start | |||||
* index of the first byte to encode | |||||
* @param length | |||||
* number of bytes to encode | |||||
* @return encoded data | |||||
*/ | |||||
public static byte[] encode(byte[] data, int start, int length) { | |||||
byte[] result = new byte[encodedLength(length)]; | |||||
int end = start + length; | |||||
int in = start; | |||||
int out = 0; | |||||
while (in < end) { | |||||
// Accumulate remaining bytes MSB first as a 32bit value | |||||
long accumulator = ((long) (data[in++] & 0xFF)) << 24; | |||||
if (in < end) { | |||||
accumulator |= (data[in++] & 0xFF) << 16; | |||||
if (in < end) { | |||||
accumulator |= (data[in++] & 0xFF) << 8; | |||||
if (in < end) { | |||||
accumulator |= (data[in++] & 0xFF); | |||||
} | |||||
} | |||||
} | |||||
// Write the 32bit value in base-85 encoding, also MSB first | |||||
for (int i = 4; i >= 0; i--) { | |||||
result[out + i] = ENCODE[(int) (accumulator % 85)]; | |||||
accumulator /= 85; | |||||
} | |||||
out += 5; | |||||
} | |||||
return result; | |||||
} | |||||
/** | |||||
* Decodes the Base-85 {@code encoded} data into a byte array of | |||||
* {@code expectedSize} bytes. | |||||
* | |||||
* @param encoded | |||||
* Base-85 encoded data | |||||
* @param expectedSize | |||||
* of the result | |||||
* @return the decoded bytes | |||||
* @throws IllegalArgumentException | |||||
* if expectedSize doesn't match, the encoded data has a length | |||||
* that is not a multiple of 5, or there are invalid characters | |||||
* in the encoded data | |||||
*/ | |||||
public static byte[] decode(byte[] encoded, int expectedSize) { | |||||
return decode(encoded, 0, encoded.length, expectedSize); | |||||
} | |||||
/** | |||||
* Decodes {@code length} bytes of Base-85 {@code encoded} data, beginning | |||||
* at the {@code start} index, into a byte array of {@code expectedSize} | |||||
* bytes. | |||||
* | |||||
* @param encoded | |||||
* Base-85 encoded data | |||||
* @param start | |||||
* index at which the data to decode starts in {@code encoded} | |||||
* @param length | |||||
* of the Base-85 encoded data | |||||
* @param expectedSize | |||||
* of the result | |||||
* @return the decoded bytes | |||||
* @throws IllegalArgumentException | |||||
* if expectedSize doesn't match, {@code length} is not a | |||||
* multiple of 5, or there are invalid characters in the encoded | |||||
* data | |||||
*/ | |||||
public static byte[] decode(byte[] encoded, int start, int length, | |||||
int expectedSize) { | |||||
if (length % 5 != 0) { | |||||
throw new IllegalArgumentException(JGitText.get().base85length); | |||||
} | |||||
byte[] result = new byte[expectedSize]; | |||||
int end = start + length; | |||||
int in = start; | |||||
int out = 0; | |||||
while (in < end && out < expectedSize) { | |||||
// Accumulate 5 bytes, "MSB" first | |||||
long accumulator = 0; | |||||
for (int i = 4; i >= 0; i--) { | |||||
int val = DECODE[encoded[in++] & 0xFF]; | |||||
if (val < 0) { | |||||
throw new IllegalArgumentException(MessageFormat.format( | |||||
JGitText.get().base85invalidChar, | |||||
Integer.toHexString(encoded[in - 1] & 0xFF))); | |||||
} | |||||
accumulator = accumulator * 85 + val; | |||||
} | |||||
if (accumulator > 0xFFFF_FFFFL) { | |||||
throw new IllegalArgumentException( | |||||
MessageFormat.format(JGitText.get().base85overflow, | |||||
Long.toHexString(accumulator))); | |||||
} | |||||
// Write remaining bytes, MSB first | |||||
result[out++] = (byte) (accumulator >>> 24); | |||||
if (out < expectedSize) { | |||||
result[out++] = (byte) (accumulator >>> 16); | |||||
if (out < expectedSize) { | |||||
result[out++] = (byte) (accumulator >>> 8); | |||||
if (out < expectedSize) { | |||||
result[out++] = (byte) accumulator; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
// Should have exhausted 'in' and filled 'out' completely | |||||
if (in < end) { | |||||
throw new IllegalArgumentException( | |||||
MessageFormat.format(JGitText.get().base85tooLong, | |||||
Integer.valueOf(expectedSize))); | |||||
} | |||||
if (out < expectedSize) { | |||||
throw new IllegalArgumentException( | |||||
MessageFormat.format(JGitText.get().base85tooShort, | |||||
Integer.valueOf(expectedSize))); | |||||
} | |||||
return result; | |||||
} | |||||
} |