From 501fc0dadde1b68a6c7bccd870e18cdf03d0e62c Mon Sep 17 00:00:00 2001 From: Thomas Wolf Date: Fri, 5 Mar 2021 23:55:18 +0100 Subject: ApplyCommand: add a base-85 codec Add an implementation for base-85 encoding and decoding [1]. Git binary patches use this format. Base-85 encoding assembles bytes as 32-bit MSB values, then converts these values to base-85 numbers (always 5 bytes) encoded as printable ASCII characters. Decoding base-85 is the reverse operation. Note that decoding may overflow on invalid input as 85^5 > 2^32. Encodings always have a length that is a multiple of 5. If input length is not divisible by 4, padding bytes are (logically) added, which are ignored when decoding. The encoding for n bytes has thus always exactly length (n + 3) / 4 * 5 in integer arithmetic (truncating division). Includes tests. [1] https://datatracker.ietf.org/doc/html/rfc1924 Bug: 371725 Change-Id: Ib5b9a503cd62cf70e080a4fb38c8cd1eeeaebcfe Signed-off-by: Thomas Wolf Signed-off-by: Matthias Sohn --- .../org/eclipse/jgit/internal/JGitText.properties | 5 + .../src/org/eclipse/jgit/internal/JGitText.java | 5 + .../src/org/eclipse/jgit/util/Base85.java | 195 +++++++++++++++++++++ 3 files changed, 205 insertions(+) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/util/Base85.java (limited to 'org.eclipse.jgit') diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties index 2fa8713daa..6c4ca52ccf 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties @@ -37,6 +37,11 @@ badRef=Bad ref: {0}: {1} badSectionEntry=Bad section entry: {0} badShallowLine=Bad shallow line: {0} bareRepositoryNoWorkdirAndIndex=Bare Repository has neither a working tree, nor an index +base85invalidChar=Invalid base-85 character: 0x{0} +base85length=Base-85 encoded data must have a length that is a multiple of 5 +base85overflow=Base-85 value overflow, does not fit into 32 bits: 0x{0} +base85tooLong=Extra base-85 encoded data for output size of {0} bytes +base85tooShort=Base-85 data decoded into less than {0} bytes baseLengthIncorrect=base length incorrect bitmapMissingObject=Bitmap at {0} is missing {1}. bitmapsMustBePrepared=Bitmaps must be prepared before they may be written. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java index ab9fc5c9bb..5c194f3413 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java @@ -65,6 +65,11 @@ public class JGitText extends TranslationBundle { /***/ public String badSectionEntry; /***/ public String badShallowLine; /***/ public String bareRepositoryNoWorkdirAndIndex; + /***/ public String base85invalidChar; + /***/ public String base85length; + /***/ public String base85overflow; + /***/ public String base85tooLong; + /***/ public String base85tooShort; /***/ public String baseLengthIncorrect; /***/ public String bitmapMissingObject; /***/ public String bitmapsMustBePrepared; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/Base85.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/Base85.java new file mode 100644 index 0000000000..54b7cfcaa7 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/Base85.java @@ -0,0 +1,195 @@ +/* + * Copyright (C) 2021 Thomas Wolf and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.util; + +import java.nio.charset.StandardCharsets; +import java.text.MessageFormat; +import java.util.Arrays; + +import org.eclipse.jgit.internal.JGitText; + +/** + * Base-85 encoder/decoder. + * + * @since 5.12 + */ +public final class Base85 { + + private static final byte[] ENCODE = ("0123456789" //$NON-NLS-1$ + + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" //$NON-NLS-1$ + + "abcdefghijklmnopqrstuvwxyz" //$NON-NLS-1$ + + "!#$%&()*+-;<=>?@^_`{|}~") //$NON-NLS-1$ + .getBytes(StandardCharsets.US_ASCII); + + private static final int[] DECODE = new int[256]; + + static { + Arrays.fill(DECODE, -1); + for (int i = 0; i < ENCODE.length; i++) { + DECODE[ENCODE[i]] = i; + } + } + + private Base85() { + // No instantiation + } + + /** + * Determines the length of the base-85 encoding for {@code rawLength} + * bytes. + * + * @param rawLength + * number of bytes to encode + * @return number of bytes needed for the base-85 encoding of + * {@code rawLength} bytes + */ + public static int encodedLength(int rawLength) { + return (rawLength + 3) / 4 * 5; + } + + /** + * Encodes the given {@code data} in Base-85. + * + * @param data + * to encode + * @return encoded data + */ + public static byte[] encode(byte[] data) { + return encode(data, 0, data.length); + } + + /** + * Encodes {@code length} bytes of {@code data} in Base-85, beginning at the + * {@code start} index. + * + * @param data + * to encode + * @param start + * index of the first byte to encode + * @param length + * number of bytes to encode + * @return encoded data + */ + public static byte[] encode(byte[] data, int start, int length) { + byte[] result = new byte[encodedLength(length)]; + int end = start + length; + int in = start; + int out = 0; + while (in < end) { + // Accumulate remaining bytes MSB first as a 32bit value + long accumulator = ((long) (data[in++] & 0xFF)) << 24; + if (in < end) { + accumulator |= (data[in++] & 0xFF) << 16; + if (in < end) { + accumulator |= (data[in++] & 0xFF) << 8; + if (in < end) { + accumulator |= (data[in++] & 0xFF); + } + } + } + // Write the 32bit value in base-85 encoding, also MSB first + for (int i = 4; i >= 0; i--) { + result[out + i] = ENCODE[(int) (accumulator % 85)]; + accumulator /= 85; + } + out += 5; + } + return result; + } + + /** + * Decodes the Base-85 {@code encoded} data into a byte array of + * {@code expectedSize} bytes. + * + * @param encoded + * Base-85 encoded data + * @param expectedSize + * of the result + * @return the decoded bytes + * @throws IllegalArgumentException + * if expectedSize doesn't match, the encoded data has a length + * that is not a multiple of 5, or there are invalid characters + * in the encoded data + */ + public static byte[] decode(byte[] encoded, int expectedSize) { + return decode(encoded, 0, encoded.length, expectedSize); + } + + /** + * Decodes {@code length} bytes of Base-85 {@code encoded} data, beginning + * at the {@code start} index, into a byte array of {@code expectedSize} + * bytes. + * + * @param encoded + * Base-85 encoded data + * @param start + * index at which the data to decode starts in {@code encoded} + * @param length + * of the Base-85 encoded data + * @param expectedSize + * of the result + * @return the decoded bytes + * @throws IllegalArgumentException + * if expectedSize doesn't match, {@code length} is not a + * multiple of 5, or there are invalid characters in the encoded + * data + */ + public static byte[] decode(byte[] encoded, int start, int length, + int expectedSize) { + if (length % 5 != 0) { + throw new IllegalArgumentException(JGitText.get().base85length); + } + byte[] result = new byte[expectedSize]; + int end = start + length; + int in = start; + int out = 0; + while (in < end && out < expectedSize) { + // Accumulate 5 bytes, "MSB" first + long accumulator = 0; + for (int i = 4; i >= 0; i--) { + int val = DECODE[encoded[in++] & 0xFF]; + if (val < 0) { + throw new IllegalArgumentException(MessageFormat.format( + JGitText.get().base85invalidChar, + Integer.toHexString(encoded[in - 1] & 0xFF))); + } + accumulator = accumulator * 85 + val; + } + if (accumulator > 0xFFFF_FFFFL) { + throw new IllegalArgumentException( + MessageFormat.format(JGitText.get().base85overflow, + Long.toHexString(accumulator))); + } + // Write remaining bytes, MSB first + result[out++] = (byte) (accumulator >>> 24); + if (out < expectedSize) { + result[out++] = (byte) (accumulator >>> 16); + if (out < expectedSize) { + result[out++] = (byte) (accumulator >>> 8); + if (out < expectedSize) { + result[out++] = (byte) accumulator; + } + } + } + } + // Should have exhausted 'in' and filled 'out' completely + if (in < end) { + throw new IllegalArgumentException( + MessageFormat.format(JGitText.get().base85tooLong, + Integer.valueOf(expectedSize))); + } + if (out < expectedSize) { + throw new IllegalArgumentException( + MessageFormat.format(JGitText.get().base85tooShort, + Integer.valueOf(expectedSize))); + } + return result; + } +} -- cgit v1.2.3