1 files changed, 158 insertions, 30 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
index 0e8e9b3d84..3ed72516c7 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
@@ -13,10 +13,17 @@ package org.eclipse.jgit.util;
 
 import static java.nio.charset.StandardCharsets.ISO_8859_1;
 import static java.nio.charset.StandardCharsets.UTF_8;
+import static java.time.Instant.EPOCH;
+import static java.time.ZoneOffset.UTC;
 import static org.eclipse.jgit.lib.ObjectChecker.author;
 import static org.eclipse.jgit.lib.ObjectChecker.committer;
 import static org.eclipse.jgit.lib.ObjectChecker.encoding;
+import static org.eclipse.jgit.lib.ObjectChecker.object;
+import static org.eclipse.jgit.lib.ObjectChecker.parent;
+import static org.eclipse.jgit.lib.ObjectChecker.tag;
 import static org.eclipse.jgit.lib.ObjectChecker.tagger;
+import static org.eclipse.jgit.lib.ObjectChecker.tree;
+import static org.eclipse.jgit.lib.ObjectChecker.type;
 
 import java.nio.ByteBuffer;
 import java.nio.charset.CharacterCodingException;
@@ -25,6 +32,10 @@ import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CodingErrorAction;
 import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.UnsupportedCharsetException;
+import java.time.DateTimeException;
+import java.time.Instant;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
@@ -39,14 +50,6 @@ import org.eclipse.jgit.lib.PersonIdent;
  * Handy utility functions to parse raw object contents.
  */
 public final class RawParseUtils {
-	/**
-	 * UTF-8 charset constant.
-	 *
-	 * @since 2.2
-	 * @deprecated use {@link java.nio.charset.StandardCharsets#UTF_8} instead
-	 */
-	@Deprecated
-	public static final Charset UTF8_CHARSET = UTF_8;
 
 	private static final byte[] digits10;
 
@@ -354,6 +357,7 @@ public final class RawParseUtils {
 	 *             if the string is not hex formatted.
 	 * @since 4.3
 	 */
+	@SuppressWarnings("IntLongMath")
 	public static final long parseHexInt64(final byte[] bs, final int p) {
 		long r = digits16[bs[p]] << 4;
 
@@ -461,6 +465,29 @@ public final class RawParseUtils {
 	}
 
 	/**
+	 * Parse a Git style timezone string in [+-]hhmm format
+	 *
+	 * @param b
+	 *            buffer to scan.
+	 * @param ptr
+	 *            position within buffer to start parsing digits at.
+	 * @param ptrResult
+	 *            optional location to return the new ptr value through. If null
+	 *            the ptr value will be discarded.
+	 * @return the ZoneOffset represention of the timezone offset string.
+	 *         Invalid offsets default to UTC.
+	 */
+	private static ZoneId parseZoneOffset(final byte[] b, int ptr,
+			MutableInteger ptrResult) {
+		int hhmm = parseBase10(b, ptr, ptrResult);
+		try {
+			return ZoneOffset.ofHoursMinutes(hhmm / 100, hhmm % 100);
+		} catch (DateTimeException e) {
+			return UTC;
+		}
+	}
+
+	/**
 	 * Locate the first position after a given character.
 	 *
 	 * @param b
@@ -519,17 +546,24 @@ public final class RawParseUtils {
 	}
 
 	/**
-	 * Locate the end of the header.  Note that headers may be
-	 * more than one line long.
+	 * Locate the first end of line after the given position, while treating
+	 * following lines which are starting with spaces as part of the current
+	 * line.
+	 * <p>
+	 * For example, {@code nextLfSkippingSplitLines(
+	 * "row \n with space at beginning of a following line\nThe actual next line",
+	 * 0)} will return the position of {@code "\nThe actual next line"}.
+	 *
 	 * @param b
 	 *            buffer to scan.
 	 * @param ptr
-	 *            position within buffer to start looking for the end-of-header.
-	 * @return new position just after the header.  This is either
-	 * b.length, or the index of the header's terminating newline.
-	 * @since 5.1
+	 *            position within buffer to start looking for the next line.
+	 * @return new position just after the line end of the last line-split. This
+	 *         is either b.length, or the index of the current split-line's
+	 *         terminating newline.
+	 * @since 6.9
 	 */
-	public static final int headerEnd(final byte[] b, int ptr) {
+	public static final int nextLfSkippingSplitLines(final byte[] b, int ptr) {
 		final int sz = b.length;
 		while (ptr < sz) {
 			final byte c = b[ptr++];
@@ -537,7 +571,62 @@ public final class RawParseUtils {
 				return ptr - 1;
 			}
 		}
-		return ptr - 1;
+		return ptr;
+	}
+
+	/**
+	 * Extract a part of a buffer as a header value, removing the single blanks
+	 * at the front of continuation lines.
+	 *
+	 * @param b
+	 *            buffer to extract the header from
+	 * @param start
+	 *            of the header value, see
+	 *            {@link #headerStart(byte[], byte[], int)}
+	 * @param end
+	 *            of the header; see
+	 *            {@link #nextLfSkippingSplitLines(byte[], int)}
+	 * @return the header value, with blanks indicating continuation lines
+	 *         stripped
+	 * @since 6.9
+	 */
+	public static final byte[] headerValue(final byte[] b, int start, int end) {
+		byte[] data = new byte[end - start];
+		int out = 0;
+		byte last = '\0';
+		for (int in = start; in < end; in++) {
+			byte ch = b[in];
+			if (ch != ' ' || last != '\n') {
+				data[out++] = ch;
+			}
+			last = ch;
+		}
+		if (out == data.length) {
+			return data;
+		}
+		return Arrays.copyOf(data, out);
+	}
+
+	/**
+	 * Locate the first end of header after the given position. Note that
+	 * headers may be more than one line long.
+	 * <p>
+	 * Also note that there might be multiple headers. If you wish to find the
+	 * last header's end - call this in a loop.
+	 *
+	 * @param b
+	 *            buffer to scan.
+	 * @param ptr
+	 *            position within buffer to start looking for the header
+	 *            (normally a new-line).
+	 * @return new position just after the line end. This is either b.length, or
+	 *         the index of the header's terminating newline.
+	 * @since 5.1
+	 * @deprecated use {{@link #nextLfSkippingSplitLines}} directly instead
+	 */
+	@Deprecated
+	public static final int headerEnd(final byte[] b, int ptr) {
+		return nextLfSkippingSplitLines(b, ptr);
 	}
 
 	/**
@@ -575,6 +664,22 @@ public final class RawParseUtils {
 	}
 
 	/**
+	 * Returns whether the message starts with any known headers.
+	 *
+	 * @param b
+	 *            buffer to scan.
+	 * @return whether the message starts with any known headers
+	 * @since 6.9
+	 */
+	public static final boolean hasAnyKnownHeaders(byte[] b) {
+		return match(b, 0, tree) != -1 || match(b, 0, parent) != -1
+				|| match(b, 0, author) != -1 || match(b, 0, committer) != -1
+				|| match(b, 0, encoding) != -1 || match(b, 0, object) != -1
+				|| match(b, 0, type) != -1 || match(b, 0, tag) != -1
+				|| match(b, 0, tagger) != -1;
+	}
+
+	/**
 	 * Locate the first position before a given character.
 	 *
 	 * @param b
@@ -868,6 +973,26 @@ public final class RawParseUtils {
 	}
 
 	/**
+	 * Parse the "encoding " header into a character set reference.
+	 * <p>
+	 * If unsuccessful, return UTF-8.
+	 *
+	 * @param buffer
+	 *            buffer to scan.
+	 * @return the Java character set representation. Never null. Default to
+	 *         UTF-8.
+	 * @see #parseEncoding(byte[])
+	 * @since 6.7
+	 */
+	public static Charset guessEncoding(byte[] buffer) {
+		try {
+			return parseEncoding(buffer);
+		} catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
+			return UTF_8;
+		}
+	}
+
+	/**
 	 * Parse a name string (e.g. author, committer, tagger) into a PersonIdent.
 	 * <p>
 	 * Leading spaces won't be trimmed from the string, i.e. will show up in the
@@ -931,17 +1056,19 @@ public final class RawParseUtils {
 		// character if there is no trailing LF.
 		final int tzBegin = lastIndexOfTrim(raw, ' ',
 				nextLF(raw, emailE - 1) - 2) + 1;
-		if (tzBegin <= emailE) // No time/zone, still valid
-			return new PersonIdent(name, email, 0, 0);
+		if (tzBegin <= emailE) { // No time/zone, still valid
+			return new PersonIdent(name, email, EPOCH, UTC);
+		}
 
 		final int whenBegin = Math.max(emailE,
 				lastIndexOfTrim(raw, ' ', tzBegin - 1) + 1);
-		if (whenBegin >= tzBegin - 1) // No time/zone, still valid
-			return new PersonIdent(name, email, 0, 0);
+		if (whenBegin >= tzBegin - 1) { // No time/zone, still valid
+			return new PersonIdent(name, email, EPOCH, UTC);
+		}
 
-		final long when = parseLongBase10(raw, whenBegin, null);
-		final int tz = parseTimeZoneOffset(raw, tzBegin);
-		return new PersonIdent(name, email, when * 1000L, tz);
+		long when = parseLongBase10(raw, whenBegin, null);
+		return new PersonIdent(name, email, Instant.ofEpochSecond(when),
+				parseZoneOffset(raw, tzBegin, null));
 	}
 
 	/**
@@ -979,16 +1106,16 @@ public final class RawParseUtils {
 			name = decode(raw, nameB, stop);
 
 		final MutableInteger ptrout = new MutableInteger();
-		long when;
-		int tz;
+		Instant when;
+		ZoneId tz;
 		if (emailE < stop) {
-			when = parseLongBase10(raw, emailE + 1, ptrout);
-			tz = parseTimeZoneOffset(raw, ptrout.value);
+			when = Instant.ofEpochSecond(parseLongBase10(raw, emailE + 1, ptrout));
+			tz = parseZoneOffset(raw, ptrout.value, null);
 		} else {
-			when = 0;
-			tz = 0;
+			when = EPOCH;
+			tz = UTC;
 		}
-		return new PersonIdent(name, email, when * 1000L, tz);
+		return new PersonIdent(name, email, when, tz);
 	}
 
 	/**
@@ -1237,6 +1364,7 @@ public final class RawParseUtils {
 		final int sz = b.length;
 		if (ptr == 0)
 			ptr += 48; // skip the "object ..." line.
+		// Assume the rest of the current paragraph is all headers.
 		while (ptr < sz && b[ptr] != '\n')
 			ptr = nextLF(b, ptr);
 		if (ptr < sz && b[ptr] == '\n')