aboutsummaryrefslogtreecommitdiffstats
path: root/org.eclipse.jgit/src/org/eclipse/jgit/util
diff options
context:
space:
mode:
authorThomas Wolf <thomas.wolf@paranor.ch>2021-03-05 23:55:18 +0100
committerMatthias Sohn <matthias.sohn@sap.com>2021-05-26 00:37:45 +0200
commit501fc0dadde1b68a6c7bccd870e18cdf03d0e62c (patch)
treea1eb67a970baf5889a670b14e6c689dcc7a43253 /org.eclipse.jgit/src/org/eclipse/jgit/util
parentd2846cc8b2a831a089ee768a0475e64ec5b85519 (diff)
downloadjgit-501fc0dadde1b68a6c7bccd870e18cdf03d0e62c.tar.gz
jgit-501fc0dadde1b68a6c7bccd870e18cdf03d0e62c.zip
ApplyCommand: add a base-85 codec
Add an implementation for base-85 encoding and decoding [1]. Git binary patches use this format. Base-85 encoding assembles bytes as 32-bit MSB values, then converts these values to base-85 numbers (always 5 bytes) encoded as printable ASCII characters. Decoding base-85 is the reverse operation. Note that decoding may overflow on invalid input as 85^5 > 2^32. Encodings always have a length that is a multiple of 5. If input length is not divisible by 4, padding bytes are (logically) added, which are ignored when decoding. The encoding for n bytes has thus always exactly length (n + 3) / 4 * 5 in integer arithmetic (truncating division). Includes tests. [1] https://datatracker.ietf.org/doc/html/rfc1924 Bug: 371725 Change-Id: Ib5b9a503cd62cf70e080a4fb38c8cd1eeeaebcfe Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch> Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
Diffstat (limited to 'org.eclipse.jgit/src/org/eclipse/jgit/util')
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/util/Base85.java195
1 files changed, 195 insertions, 0 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/Base85.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/Base85.java
new file mode 100644
index 0000000000..54b7cfcaa7
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/Base85.java
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.util;
+
+import java.nio.charset.StandardCharsets;
+import java.text.MessageFormat;
+import java.util.Arrays;
+
+import org.eclipse.jgit.internal.JGitText;
+
+/**
+ * Base-85 encoder/decoder.
+ *
+ * @since 5.12
+ */
+public final class Base85 {
+
+ private static final byte[] ENCODE = ("0123456789" //$NON-NLS-1$
+ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" //$NON-NLS-1$
+ + "abcdefghijklmnopqrstuvwxyz" //$NON-NLS-1$
+ + "!#$%&()*+-;<=>?@^_`{|}~") //$NON-NLS-1$
+ .getBytes(StandardCharsets.US_ASCII);
+
+ private static final int[] DECODE = new int[256];
+
+ static {
+ Arrays.fill(DECODE, -1);
+ for (int i = 0; i < ENCODE.length; i++) {
+ DECODE[ENCODE[i]] = i;
+ }
+ }
+
+ private Base85() {
+ // No instantiation
+ }
+
+ /**
+ * Determines the length of the base-85 encoding for {@code rawLength}
+ * bytes.
+ *
+ * @param rawLength
+ * number of bytes to encode
+ * @return number of bytes needed for the base-85 encoding of
+ * {@code rawLength} bytes
+ */
+ public static int encodedLength(int rawLength) {
+ return (rawLength + 3) / 4 * 5;
+ }
+
+ /**
+ * Encodes the given {@code data} in Base-85.
+ *
+ * @param data
+ * to encode
+ * @return encoded data
+ */
+ public static byte[] encode(byte[] data) {
+ return encode(data, 0, data.length);
+ }
+
+ /**
+ * Encodes {@code length} bytes of {@code data} in Base-85, beginning at the
+ * {@code start} index.
+ *
+ * @param data
+ * to encode
+ * @param start
+ * index of the first byte to encode
+ * @param length
+ * number of bytes to encode
+ * @return encoded data
+ */
+ public static byte[] encode(byte[] data, int start, int length) {
+ byte[] result = new byte[encodedLength(length)];
+ int end = start + length;
+ int in = start;
+ int out = 0;
+ while (in < end) {
+ // Accumulate remaining bytes MSB first as a 32bit value
+ long accumulator = ((long) (data[in++] & 0xFF)) << 24;
+ if (in < end) {
+ accumulator |= (data[in++] & 0xFF) << 16;
+ if (in < end) {
+ accumulator |= (data[in++] & 0xFF) << 8;
+ if (in < end) {
+ accumulator |= (data[in++] & 0xFF);
+ }
+ }
+ }
+ // Write the 32bit value in base-85 encoding, also MSB first
+ for (int i = 4; i >= 0; i--) {
+ result[out + i] = ENCODE[(int) (accumulator % 85)];
+ accumulator /= 85;
+ }
+ out += 5;
+ }
+ return result;
+ }
+
+ /**
+ * Decodes the Base-85 {@code encoded} data into a byte array of
+ * {@code expectedSize} bytes.
+ *
+ * @param encoded
+ * Base-85 encoded data
+ * @param expectedSize
+ * of the result
+ * @return the decoded bytes
+ * @throws IllegalArgumentException
+ * if expectedSize doesn't match, the encoded data has a length
+ * that is not a multiple of 5, or there are invalid characters
+ * in the encoded data
+ */
+ public static byte[] decode(byte[] encoded, int expectedSize) {
+ return decode(encoded, 0, encoded.length, expectedSize);
+ }
+
+ /**
+ * Decodes {@code length} bytes of Base-85 {@code encoded} data, beginning
+ * at the {@code start} index, into a byte array of {@code expectedSize}
+ * bytes.
+ *
+ * @param encoded
+ * Base-85 encoded data
+ * @param start
+ * index at which the data to decode starts in {@code encoded}
+ * @param length
+ * of the Base-85 encoded data
+ * @param expectedSize
+ * of the result
+ * @return the decoded bytes
+ * @throws IllegalArgumentException
+ * if expectedSize doesn't match, {@code length} is not a
+ * multiple of 5, or there are invalid characters in the encoded
+ * data
+ */
+ public static byte[] decode(byte[] encoded, int start, int length,
+ int expectedSize) {
+ if (length % 5 != 0) {
+ throw new IllegalArgumentException(JGitText.get().base85length);
+ }
+ byte[] result = new byte[expectedSize];
+ int end = start + length;
+ int in = start;
+ int out = 0;
+ while (in < end && out < expectedSize) {
+ // Accumulate 5 bytes, "MSB" first
+ long accumulator = 0;
+ for (int i = 4; i >= 0; i--) {
+ int val = DECODE[encoded[in++] & 0xFF];
+ if (val < 0) {
+ throw new IllegalArgumentException(MessageFormat.format(
+ JGitText.get().base85invalidChar,
+ Integer.toHexString(encoded[in - 1] & 0xFF)));
+ }
+ accumulator = accumulator * 85 + val;
+ }
+ if (accumulator > 0xFFFF_FFFFL) {
+ throw new IllegalArgumentException(
+ MessageFormat.format(JGitText.get().base85overflow,
+ Long.toHexString(accumulator)));
+ }
+ // Write remaining bytes, MSB first
+ result[out++] = (byte) (accumulator >>> 24);
+ if (out < expectedSize) {
+ result[out++] = (byte) (accumulator >>> 16);
+ if (out < expectedSize) {
+ result[out++] = (byte) (accumulator >>> 8);
+ if (out < expectedSize) {
+ result[out++] = (byte) accumulator;
+ }
+ }
+ }
+ }
+ // Should have exhausted 'in' and filled 'out' completely
+ if (in < end) {
+ throw new IllegalArgumentException(
+ MessageFormat.format(JGitText.get().base85tooLong,
+ Integer.valueOf(expectedSize)));
+ }
+ if (out < expectedSize) {
+ throw new IllegalArgumentException(
+ MessageFormat.format(JGitText.get().base85tooShort,
+ Integer.valueOf(expectedSize)));
+ }
+ return result;
+ }
+}