aboutsummaryrefslogtreecommitdiffstats
path: root/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
diff options
context:
space:
mode:
Diffstat (limited to 'org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java')
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java188
1 files changed, 158 insertions, 30 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
index 0e8e9b3d84..3ed72516c7 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
@@ -13,10 +13,17 @@ package org.eclipse.jgit.util;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;
+import static java.time.Instant.EPOCH;
+import static java.time.ZoneOffset.UTC;
import static org.eclipse.jgit.lib.ObjectChecker.author;
import static org.eclipse.jgit.lib.ObjectChecker.committer;
import static org.eclipse.jgit.lib.ObjectChecker.encoding;
+import static org.eclipse.jgit.lib.ObjectChecker.object;
+import static org.eclipse.jgit.lib.ObjectChecker.parent;
+import static org.eclipse.jgit.lib.ObjectChecker.tag;
import static org.eclipse.jgit.lib.ObjectChecker.tagger;
+import static org.eclipse.jgit.lib.ObjectChecker.tree;
+import static org.eclipse.jgit.lib.ObjectChecker.type;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
@@ -25,6 +32,10 @@ import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
+import java.time.DateTimeException;
+import java.time.Instant;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
@@ -39,14 +50,6 @@ import org.eclipse.jgit.lib.PersonIdent;
* Handy utility functions to parse raw object contents.
*/
public final class RawParseUtils {
- /**
- * UTF-8 charset constant.
- *
- * @since 2.2
- * @deprecated use {@link java.nio.charset.StandardCharsets#UTF_8} instead
- */
- @Deprecated
- public static final Charset UTF8_CHARSET = UTF_8;
private static final byte[] digits10;
@@ -354,6 +357,7 @@ public final class RawParseUtils {
* if the string is not hex formatted.
* @since 4.3
*/
+ @SuppressWarnings("IntLongMath")
public static final long parseHexInt64(final byte[] bs, final int p) {
long r = digits16[bs[p]] << 4;
@@ -461,6 +465,29 @@ public final class RawParseUtils {
}
/**
+ * Parse a Git style timezone string in [+-]hhmm format
+ *
+ * @param b
+ * buffer to scan.
+ * @param ptr
+ * position within buffer to start parsing digits at.
+ * @param ptrResult
+ * optional location to return the new ptr value through. If null
+ * the ptr value will be discarded.
+ * @return the ZoneOffset represention of the timezone offset string.
+ * Invalid offsets default to UTC.
+ */
+ private static ZoneId parseZoneOffset(final byte[] b, int ptr,
+ MutableInteger ptrResult) {
+ int hhmm = parseBase10(b, ptr, ptrResult);
+ try {
+ return ZoneOffset.ofHoursMinutes(hhmm / 100, hhmm % 100);
+ } catch (DateTimeException e) {
+ return UTC;
+ }
+ }
+
+ /**
* Locate the first position after a given character.
*
* @param b
@@ -519,17 +546,24 @@ public final class RawParseUtils {
}
/**
- * Locate the end of the header. Note that headers may be
- * more than one line long.
+ * Locate the first end of line after the given position, while treating
+ * following lines which are starting with spaces as part of the current
+ * line.
+ * <p>
+ * For example, {@code nextLfSkippingSplitLines(
+ * "row \n with space at beginning of a following line\nThe actual next line",
+ * 0)} will return the position of {@code "\nThe actual next line"}.
+ *
* @param b
* buffer to scan.
* @param ptr
- * position within buffer to start looking for the end-of-header.
- * @return new position just after the header. This is either
- * b.length, or the index of the header's terminating newline.
- * @since 5.1
+ * position within buffer to start looking for the next line.
+ * @return new position just after the line end of the last line-split. This
+ * is either b.length, or the index of the current split-line's
+ * terminating newline.
+ * @since 6.9
*/
- public static final int headerEnd(final byte[] b, int ptr) {
+ public static final int nextLfSkippingSplitLines(final byte[] b, int ptr) {
final int sz = b.length;
while (ptr < sz) {
final byte c = b[ptr++];
@@ -537,7 +571,62 @@ public final class RawParseUtils {
return ptr - 1;
}
}
- return ptr - 1;
+ return ptr;
+ }
+
+ /**
+ * Extract a part of a buffer as a header value, removing the single blanks
+ * at the front of continuation lines.
+ *
+ * @param b
+ * buffer to extract the header from
+ * @param start
+ * of the header value, see
+ * {@link #headerStart(byte[], byte[], int)}
+ * @param end
+ * of the header; see
+ * {@link #nextLfSkippingSplitLines(byte[], int)}
+ * @return the header value, with blanks indicating continuation lines
+ * stripped
+ * @since 6.9
+ */
+ public static final byte[] headerValue(final byte[] b, int start, int end) {
+ byte[] data = new byte[end - start];
+ int out = 0;
+ byte last = '\0';
+ for (int in = start; in < end; in++) {
+ byte ch = b[in];
+ if (ch != ' ' || last != '\n') {
+ data[out++] = ch;
+ }
+ last = ch;
+ }
+ if (out == data.length) {
+ return data;
+ }
+ return Arrays.copyOf(data, out);
+ }
+
+ /**
+ * Locate the first end of header after the given position. Note that
+ * headers may be more than one line long.
+ * <p>
+ * Also note that there might be multiple headers. If you wish to find the
+ * last header's end - call this in a loop.
+ *
+ * @param b
+ * buffer to scan.
+ * @param ptr
+ * position within buffer to start looking for the header
+ * (normally a new-line).
+ * @return new position just after the line end. This is either b.length, or
+ * the index of the header's terminating newline.
+ * @since 5.1
+ * @deprecated use {{@link #nextLfSkippingSplitLines}} directly instead
+ */
+ @Deprecated
+ public static final int headerEnd(final byte[] b, int ptr) {
+ return nextLfSkippingSplitLines(b, ptr);
}
/**
@@ -575,6 +664,22 @@ public final class RawParseUtils {
}
/**
+ * Returns whether the message starts with any known headers.
+ *
+ * @param b
+ * buffer to scan.
+ * @return whether the message starts with any known headers
+ * @since 6.9
+ */
+ public static final boolean hasAnyKnownHeaders(byte[] b) {
+ return match(b, 0, tree) != -1 || match(b, 0, parent) != -1
+ || match(b, 0, author) != -1 || match(b, 0, committer) != -1
+ || match(b, 0, encoding) != -1 || match(b, 0, object) != -1
+ || match(b, 0, type) != -1 || match(b, 0, tag) != -1
+ || match(b, 0, tagger) != -1;
+ }
+
+ /**
* Locate the first position before a given character.
*
* @param b
@@ -868,6 +973,26 @@ public final class RawParseUtils {
}
/**
+ * Parse the "encoding " header into a character set reference.
+ * <p>
+ * If unsuccessful, return UTF-8.
+ *
+ * @param buffer
+ * buffer to scan.
+ * @return the Java character set representation. Never null. Default to
+ * UTF-8.
+ * @see #parseEncoding(byte[])
+ * @since 6.7
+ */
+ public static Charset guessEncoding(byte[] buffer) {
+ try {
+ return parseEncoding(buffer);
+ } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
+ return UTF_8;
+ }
+ }
+
+ /**
* Parse a name string (e.g. author, committer, tagger) into a PersonIdent.
* <p>
* Leading spaces won't be trimmed from the string, i.e. will show up in the
@@ -931,17 +1056,19 @@ public final class RawParseUtils {
// character if there is no trailing LF.
final int tzBegin = lastIndexOfTrim(raw, ' ',
nextLF(raw, emailE - 1) - 2) + 1;
- if (tzBegin <= emailE) // No time/zone, still valid
- return new PersonIdent(name, email, 0, 0);
+ if (tzBegin <= emailE) { // No time/zone, still valid
+ return new PersonIdent(name, email, EPOCH, UTC);
+ }
final int whenBegin = Math.max(emailE,
lastIndexOfTrim(raw, ' ', tzBegin - 1) + 1);
- if (whenBegin >= tzBegin - 1) // No time/zone, still valid
- return new PersonIdent(name, email, 0, 0);
+ if (whenBegin >= tzBegin - 1) { // No time/zone, still valid
+ return new PersonIdent(name, email, EPOCH, UTC);
+ }
- final long when = parseLongBase10(raw, whenBegin, null);
- final int tz = parseTimeZoneOffset(raw, tzBegin);
- return new PersonIdent(name, email, when * 1000L, tz);
+ long when = parseLongBase10(raw, whenBegin, null);
+ return new PersonIdent(name, email, Instant.ofEpochSecond(when),
+ parseZoneOffset(raw, tzBegin, null));
}
/**
@@ -979,16 +1106,16 @@ public final class RawParseUtils {
name = decode(raw, nameB, stop);
final MutableInteger ptrout = new MutableInteger();
- long when;
- int tz;
+ Instant when;
+ ZoneId tz;
if (emailE < stop) {
- when = parseLongBase10(raw, emailE + 1, ptrout);
- tz = parseTimeZoneOffset(raw, ptrout.value);
+ when = Instant.ofEpochSecond(parseLongBase10(raw, emailE + 1, ptrout));
+ tz = parseZoneOffset(raw, ptrout.value, null);
} else {
- when = 0;
- tz = 0;
+ when = EPOCH;
+ tz = UTC;
}
- return new PersonIdent(name, email, when * 1000L, tz);
+ return new PersonIdent(name, email, when, tz);
}
/**
@@ -1237,6 +1364,7 @@ public final class RawParseUtils {
final int sz = b.length;
if (ptr == 0)
ptr += 48; // skip the "object ..." line.
+ // Assume the rest of the current paragraph is all headers.
while (ptr < sz && b[ptr] != '\n')
ptr = nextLF(b, ptr);
if (ptr < sz && b[ptr] == '\n')