diff options
Diffstat (limited to 'org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java')
-rw-r--r-- | org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java | 188 |
1 files changed, 158 insertions, 30 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java index 0e8e9b3d84..3ed72516c7 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java @@ -13,10 +13,17 @@ package org.eclipse.jgit.util; import static java.nio.charset.StandardCharsets.ISO_8859_1; import static java.nio.charset.StandardCharsets.UTF_8; +import static java.time.Instant.EPOCH; +import static java.time.ZoneOffset.UTC; import static org.eclipse.jgit.lib.ObjectChecker.author; import static org.eclipse.jgit.lib.ObjectChecker.committer; import static org.eclipse.jgit.lib.ObjectChecker.encoding; +import static org.eclipse.jgit.lib.ObjectChecker.object; +import static org.eclipse.jgit.lib.ObjectChecker.parent; +import static org.eclipse.jgit.lib.ObjectChecker.tag; import static org.eclipse.jgit.lib.ObjectChecker.tagger; +import static org.eclipse.jgit.lib.ObjectChecker.tree; +import static org.eclipse.jgit.lib.ObjectChecker.type; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; @@ -25,6 +32,10 @@ import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.UnsupportedCharsetException; +import java.time.DateTimeException; +import java.time.Instant; +import java.time.ZoneId; +import java.time.ZoneOffset; import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -39,14 +50,6 @@ import org.eclipse.jgit.lib.PersonIdent; * Handy utility functions to parse raw object contents. */ public final class RawParseUtils { - /** - * UTF-8 charset constant. - * - * @since 2.2 - * @deprecated use {@link java.nio.charset.StandardCharsets#UTF_8} instead - */ - @Deprecated - public static final Charset UTF8_CHARSET = UTF_8; private static final byte[] digits10; @@ -354,6 +357,7 @@ public final class RawParseUtils { * if the string is not hex formatted. * @since 4.3 */ + @SuppressWarnings("IntLongMath") public static final long parseHexInt64(final byte[] bs, final int p) { long r = digits16[bs[p]] << 4; @@ -461,6 +465,29 @@ public final class RawParseUtils { } /** + * Parse a Git style timezone string in [+-]hhmm format + * + * @param b + * buffer to scan. + * @param ptr + * position within buffer to start parsing digits at. + * @param ptrResult + * optional location to return the new ptr value through. If null + * the ptr value will be discarded. + * @return the ZoneOffset represention of the timezone offset string. + * Invalid offsets default to UTC. + */ + private static ZoneId parseZoneOffset(final byte[] b, int ptr, + MutableInteger ptrResult) { + int hhmm = parseBase10(b, ptr, ptrResult); + try { + return ZoneOffset.ofHoursMinutes(hhmm / 100, hhmm % 100); + } catch (DateTimeException e) { + return UTC; + } + } + + /** * Locate the first position after a given character. * * @param b @@ -519,17 +546,24 @@ public final class RawParseUtils { } /** - * Locate the end of the header. Note that headers may be - * more than one line long. + * Locate the first end of line after the given position, while treating + * following lines which are starting with spaces as part of the current + * line. + * <p> + * For example, {@code nextLfSkippingSplitLines( + * "row \n with space at beginning of a following line\nThe actual next line", + * 0)} will return the position of {@code "\nThe actual next line"}. + * * @param b * buffer to scan. * @param ptr - * position within buffer to start looking for the end-of-header. - * @return new position just after the header. This is either - * b.length, or the index of the header's terminating newline. - * @since 5.1 + * position within buffer to start looking for the next line. + * @return new position just after the line end of the last line-split. This + * is either b.length, or the index of the current split-line's + * terminating newline. + * @since 6.9 */ - public static final int headerEnd(final byte[] b, int ptr) { + public static final int nextLfSkippingSplitLines(final byte[] b, int ptr) { final int sz = b.length; while (ptr < sz) { final byte c = b[ptr++]; @@ -537,7 +571,62 @@ public final class RawParseUtils { return ptr - 1; } } - return ptr - 1; + return ptr; + } + + /** + * Extract a part of a buffer as a header value, removing the single blanks + * at the front of continuation lines. + * + * @param b + * buffer to extract the header from + * @param start + * of the header value, see + * {@link #headerStart(byte[], byte[], int)} + * @param end + * of the header; see + * {@link #nextLfSkippingSplitLines(byte[], int)} + * @return the header value, with blanks indicating continuation lines + * stripped + * @since 6.9 + */ + public static final byte[] headerValue(final byte[] b, int start, int end) { + byte[] data = new byte[end - start]; + int out = 0; + byte last = '\0'; + for (int in = start; in < end; in++) { + byte ch = b[in]; + if (ch != ' ' || last != '\n') { + data[out++] = ch; + } + last = ch; + } + if (out == data.length) { + return data; + } + return Arrays.copyOf(data, out); + } + + /** + * Locate the first end of header after the given position. Note that + * headers may be more than one line long. + * <p> + * Also note that there might be multiple headers. If you wish to find the + * last header's end - call this in a loop. + * + * @param b + * buffer to scan. + * @param ptr + * position within buffer to start looking for the header + * (normally a new-line). + * @return new position just after the line end. This is either b.length, or + * the index of the header's terminating newline. + * @since 5.1 + * @deprecated use {{@link #nextLfSkippingSplitLines}} directly instead + */ + @Deprecated + public static final int headerEnd(final byte[] b, int ptr) { + return nextLfSkippingSplitLines(b, ptr); } /** @@ -575,6 +664,22 @@ public final class RawParseUtils { } /** + * Returns whether the message starts with any known headers. + * + * @param b + * buffer to scan. + * @return whether the message starts with any known headers + * @since 6.9 + */ + public static final boolean hasAnyKnownHeaders(byte[] b) { + return match(b, 0, tree) != -1 || match(b, 0, parent) != -1 + || match(b, 0, author) != -1 || match(b, 0, committer) != -1 + || match(b, 0, encoding) != -1 || match(b, 0, object) != -1 + || match(b, 0, type) != -1 || match(b, 0, tag) != -1 + || match(b, 0, tagger) != -1; + } + + /** * Locate the first position before a given character. * * @param b @@ -868,6 +973,26 @@ public final class RawParseUtils { } /** + * Parse the "encoding " header into a character set reference. + * <p> + * If unsuccessful, return UTF-8. + * + * @param buffer + * buffer to scan. + * @return the Java character set representation. Never null. Default to + * UTF-8. + * @see #parseEncoding(byte[]) + * @since 6.7 + */ + public static Charset guessEncoding(byte[] buffer) { + try { + return parseEncoding(buffer); + } catch (IllegalCharsetNameException | UnsupportedCharsetException e) { + return UTF_8; + } + } + + /** * Parse a name string (e.g. author, committer, tagger) into a PersonIdent. * <p> * Leading spaces won't be trimmed from the string, i.e. will show up in the @@ -931,17 +1056,19 @@ public final class RawParseUtils { // character if there is no trailing LF. final int tzBegin = lastIndexOfTrim(raw, ' ', nextLF(raw, emailE - 1) - 2) + 1; - if (tzBegin <= emailE) // No time/zone, still valid - return new PersonIdent(name, email, 0, 0); + if (tzBegin <= emailE) { // No time/zone, still valid + return new PersonIdent(name, email, EPOCH, UTC); + } final int whenBegin = Math.max(emailE, lastIndexOfTrim(raw, ' ', tzBegin - 1) + 1); - if (whenBegin >= tzBegin - 1) // No time/zone, still valid - return new PersonIdent(name, email, 0, 0); + if (whenBegin >= tzBegin - 1) { // No time/zone, still valid + return new PersonIdent(name, email, EPOCH, UTC); + } - final long when = parseLongBase10(raw, whenBegin, null); - final int tz = parseTimeZoneOffset(raw, tzBegin); - return new PersonIdent(name, email, when * 1000L, tz); + long when = parseLongBase10(raw, whenBegin, null); + return new PersonIdent(name, email, Instant.ofEpochSecond(when), + parseZoneOffset(raw, tzBegin, null)); } /** @@ -979,16 +1106,16 @@ public final class RawParseUtils { name = decode(raw, nameB, stop); final MutableInteger ptrout = new MutableInteger(); - long when; - int tz; + Instant when; + ZoneId tz; if (emailE < stop) { - when = parseLongBase10(raw, emailE + 1, ptrout); - tz = parseTimeZoneOffset(raw, ptrout.value); + when = Instant.ofEpochSecond(parseLongBase10(raw, emailE + 1, ptrout)); + tz = parseZoneOffset(raw, ptrout.value, null); } else { - when = 0; - tz = 0; + when = EPOCH; + tz = UTC; } - return new PersonIdent(name, email, when * 1000L, tz); + return new PersonIdent(name, email, when, tz); } /** @@ -1237,6 +1364,7 @@ public final class RawParseUtils { final int sz = b.length; if (ptr == 0) ptr += 48; // skip the "object ..." line. + // Assume the rest of the current paragraph is all headers. while (ptr < sz && b[ptr] != '\n') ptr = nextLF(b, ptr); if (ptr < sz && b[ptr] == '\n') |