aboutsummaryrefslogtreecommitdiffstats
path: root/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
diff options
context:
space:
mode:
Diffstat (limited to 'org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java')
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java573
1 files changed, 451 insertions, 122 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
index 3c2460cad7..3ed72516c7 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
@@ -1,53 +1,29 @@
/*
* Copyright (C) 2008-2009, Google Inc.
- * Copyright (C) 2006-2008, Shawn O. Pearce <spearce@spearce.org>
- * and other copyright owners as documented in the project's IP log.
+ * Copyright (C) 2006-2008, Shawn O. Pearce <spearce@spearce.org> and others
*
- * This program and the accompanying materials are made available
- * under the terms of the Eclipse Distribution License v1.0 which
- * accompanies this distribution, is reproduced below, and is
- * available at http://www.eclipse.org/org/documents/edl-v10.php
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
*
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * - Neither the name of the Eclipse Foundation, Inc. nor the
- * names of its contributors may be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.util;
+import static java.nio.charset.StandardCharsets.ISO_8859_1;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static java.time.Instant.EPOCH;
+import static java.time.ZoneOffset.UTC;
import static org.eclipse.jgit.lib.ObjectChecker.author;
import static org.eclipse.jgit.lib.ObjectChecker.committer;
import static org.eclipse.jgit.lib.ObjectChecker.encoding;
+import static org.eclipse.jgit.lib.ObjectChecker.object;
+import static org.eclipse.jgit.lib.ObjectChecker.parent;
+import static org.eclipse.jgit.lib.ObjectChecker.tag;
import static org.eclipse.jgit.lib.ObjectChecker.tagger;
+import static org.eclipse.jgit.lib.ObjectChecker.tree;
+import static org.eclipse.jgit.lib.ObjectChecker.type;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
@@ -56,21 +32,24 @@ import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
+import java.time.DateTimeException;
+import java.time.Instant;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
+import org.eclipse.jgit.annotations.Nullable;
+import org.eclipse.jgit.diff.RawText;
+import org.eclipse.jgit.errors.BinaryBlobException;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.PersonIdent;
-/** Handy utility functions to parse raw object contents. */
+/**
+ * Handy utility functions to parse raw object contents.
+ */
public final class RawParseUtils {
- /**
- * UTF-8 charset constant.
- *
- * @since 2.2
- */
- public static final Charset UTF8_CHARSET = Charset.forName("UTF-8"); //$NON-NLS-1$
private static final byte[] digits10;
@@ -81,8 +60,9 @@ public final class RawParseUtils {
private static final Map<String, Charset> encodingAliases;
static {
- encodingAliases = new HashMap<String, Charset>();
- encodingAliases.put("latin-1", Charset.forName("ISO-8859-1")); //$NON-NLS-1$ //$NON-NLS-2$
+ encodingAliases = new HashMap<>();
+ encodingAliases.put("latin-1", ISO_8859_1); //$NON-NLS-1$
+ encodingAliases.put("iso-latin-1", ISO_8859_1); //$NON-NLS-1$
digits10 = new byte['9' + 1];
Arrays.fill(digits10, (byte) -1);
@@ -119,7 +99,7 @@ public final class RawParseUtils {
* the buffer to test for equality with b.
* @return ptr + src.length if b[ptr..src.length] == src; else -1.
*/
- public static final int match(final byte[] b, int ptr, final byte[] src) {
+ public static final int match(byte[] b, int ptr, byte[] src) {
if (ptr + src.length > b.length)
return -1;
for (int i = 0; i < src.length; i++, ptr++)
@@ -302,7 +282,7 @@ public final class RawParseUtils {
* @param p
* first position within the buffer to parse.
* @return the integer value.
- * @throws ArrayIndexOutOfBoundsException
+ * @throws java.lang.ArrayIndexOutOfBoundsException
* if the string is not hex formatted.
*/
public static final int parseHexInt16(final byte[] bs, final int p) {
@@ -332,7 +312,7 @@ public final class RawParseUtils {
* @param p
* first position within the buffer to parse.
* @return the integer value.
- * @throws ArrayIndexOutOfBoundsException
+ * @throws java.lang.ArrayIndexOutOfBoundsException
* if the string is not hex formatted.
*/
public static final int parseHexInt32(final byte[] bs, final int p) {
@@ -362,12 +342,79 @@ public final class RawParseUtils {
}
/**
+ * Parse 16 character base 16 (hex) formatted string to unsigned long.
+ * <p>
+ * The number is read in network byte order, that is, most significant
+ * nibble first.
+ *
+ * @param bs
+ * buffer to parse digits from; positions {@code [p, p+16)} will
+ * be parsed.
+ * @param p
+ * first position within the buffer to parse.
+ * @return the integer value.
+ * @throws java.lang.ArrayIndexOutOfBoundsException
+ * if the string is not hex formatted.
+ * @since 4.3
+ */
+ @SuppressWarnings("IntLongMath")
+ public static final long parseHexInt64(final byte[] bs, final int p) {
+ long r = digits16[bs[p]] << 4;
+
+ r |= digits16[bs[p + 1]];
+ r <<= 4;
+
+ r |= digits16[bs[p + 2]];
+ r <<= 4;
+
+ r |= digits16[bs[p + 3]];
+ r <<= 4;
+
+ r |= digits16[bs[p + 4]];
+ r <<= 4;
+
+ r |= digits16[bs[p + 5]];
+ r <<= 4;
+
+ r |= digits16[bs[p + 6]];
+ r <<= 4;
+
+ r |= digits16[bs[p + 7]];
+ r <<= 4;
+
+ r |= digits16[bs[p + 8]];
+ r <<= 4;
+
+ r |= digits16[bs[p + 9]];
+ r <<= 4;
+
+ r |= digits16[bs[p + 10]];
+ r <<= 4;
+
+ r |= digits16[bs[p + 11]];
+ r <<= 4;
+
+ r |= digits16[bs[p + 12]];
+ r <<= 4;
+
+ r |= digits16[bs[p + 13]];
+ r <<= 4;
+
+ r |= digits16[bs[p + 14]];
+
+ final int last = digits16[bs[p + 15]];
+ if (r < 0 || last < 0)
+ throw new ArrayIndexOutOfBoundsException();
+ return (r << 4) | last;
+ }
+
+ /**
* Parse a single hex digit to its numeric value (0-15).
*
* @param digit
* hex character to parse.
* @return numeric value, in the range 0-15.
- * @throws ArrayIndexOutOfBoundsException
+ * @throws java.lang.ArrayIndexOutOfBoundsException
* if the input digit is not a valid hex digit.
*/
public static final int parseHexInt4(final byte digit) {
@@ -389,14 +436,58 @@ public final class RawParseUtils {
* position within buffer to start parsing digits at.
* @return the timezone at this location, expressed in minutes.
*/
- public static final int parseTimeZoneOffset(final byte[] b, int ptr) {
- final int v = parseBase10(b, ptr, null);
+ public static final int parseTimeZoneOffset(byte[] b, int ptr) {
+ return parseTimeZoneOffset(b, ptr, null);
+ }
+
+ /**
+ * Parse a Git style timezone string.
+ * <p>
+ * The sequence "-0315" will be parsed as the numeric value -195, as the
+ * lower two positions count minutes, not 100ths of an hour.
+ *
+ * @param b
+ * buffer to scan.
+ * @param ptr
+ * position within buffer to start parsing digits at.
+ * @param ptrResult
+ * optional location to return the new ptr value through. If null
+ * the ptr value will be discarded.
+ * @return the timezone at this location, expressed in minutes.
+ * @since 4.1
+ */
+ public static final int parseTimeZoneOffset(final byte[] b, int ptr,
+ MutableInteger ptrResult) {
+ final int v = parseBase10(b, ptr, ptrResult);
final int tzMins = v % 100;
final int tzHours = v / 100;
return tzHours * 60 + tzMins;
}
/**
+ * Parse a Git style timezone string in [+-]hhmm format
+ *
+ * @param b
+ * buffer to scan.
+ * @param ptr
+ * position within buffer to start parsing digits at.
+ * @param ptrResult
+ * optional location to return the new ptr value through. If null
+ * the ptr value will be discarded.
+ * @return the ZoneOffset represention of the timezone offset string.
+ * Invalid offsets default to UTC.
+ */
+ private static ZoneId parseZoneOffset(final byte[] b, int ptr,
+ MutableInteger ptrResult) {
+ int hhmm = parseBase10(b, ptr, ptrResult);
+ try {
+ return ZoneOffset.ofHoursMinutes(hhmm / 100, hhmm % 100);
+ } catch (DateTimeException e) {
+ return UTC;
+ }
+ }
+
+ /**
* Locate the first position after a given character.
*
* @param b
@@ -407,7 +498,7 @@ public final class RawParseUtils {
* character to find.
* @return new position just after chrA.
*/
- public static final int next(final byte[] b, int ptr, final char chrA) {
+ public static final int next(byte[] b, int ptr, char chrA) {
final int sz = b.length;
while (ptr < sz) {
if (b[ptr++] == chrA)
@@ -427,7 +518,7 @@ public final class RawParseUtils {
* position within buffer to start looking for LF at.
* @return new position just after the first LF found.
*/
- public static final int nextLF(final byte[] b, int ptr) {
+ public static final int nextLF(byte[] b, int ptr) {
return next(b, ptr, '\n');
}
@@ -444,7 +535,7 @@ public final class RawParseUtils {
* character to find.
* @return new position just after the first chrA or LF to be found.
*/
- public static final int nextLF(final byte[] b, int ptr, final char chrA) {
+ public static final int nextLF(byte[] b, int ptr, char chrA) {
final int sz = b.length;
while (ptr < sz) {
final byte c = b[ptr++];
@@ -455,6 +546,140 @@ public final class RawParseUtils {
}
/**
+ * Locate the first end of line after the given position, while treating
+ * following lines which are starting with spaces as part of the current
+ * line.
+ * <p>
+ * For example, {@code nextLfSkippingSplitLines(
+ * "row \n with space at beginning of a following line\nThe actual next line",
+ * 0)} will return the position of {@code "\nThe actual next line"}.
+ *
+ * @param b
+ * buffer to scan.
+ * @param ptr
+ * position within buffer to start looking for the next line.
+ * @return new position just after the line end of the last line-split. This
+ * is either b.length, or the index of the current split-line's
+ * terminating newline.
+ * @since 6.9
+ */
+ public static final int nextLfSkippingSplitLines(final byte[] b, int ptr) {
+ final int sz = b.length;
+ while (ptr < sz) {
+ final byte c = b[ptr++];
+ if (c == '\n' && (ptr == sz || b[ptr] != ' ')) {
+ return ptr - 1;
+ }
+ }
+ return ptr;
+ }
+
+ /**
+ * Extract a part of a buffer as a header value, removing the single blanks
+ * at the front of continuation lines.
+ *
+ * @param b
+ * buffer to extract the header from
+ * @param start
+ * of the header value, see
+ * {@link #headerStart(byte[], byte[], int)}
+ * @param end
+ * of the header; see
+ * {@link #nextLfSkippingSplitLines(byte[], int)}
+ * @return the header value, with blanks indicating continuation lines
+ * stripped
+ * @since 6.9
+ */
+ public static final byte[] headerValue(final byte[] b, int start, int end) {
+ byte[] data = new byte[end - start];
+ int out = 0;
+ byte last = '\0';
+ for (int in = start; in < end; in++) {
+ byte ch = b[in];
+ if (ch != ' ' || last != '\n') {
+ data[out++] = ch;
+ }
+ last = ch;
+ }
+ if (out == data.length) {
+ return data;
+ }
+ return Arrays.copyOf(data, out);
+ }
+
+ /**
+ * Locate the first end of header after the given position. Note that
+ * headers may be more than one line long.
+ * <p>
+ * Also note that there might be multiple headers. If you wish to find the
+ * last header's end - call this in a loop.
+ *
+ * @param b
+ * buffer to scan.
+ * @param ptr
+ * position within buffer to start looking for the header
+ * (normally a new-line).
+ * @return new position just after the line end. This is either b.length, or
+ * the index of the header's terminating newline.
+ * @since 5.1
+ * @deprecated use {{@link #nextLfSkippingSplitLines}} directly instead
+ */
+ @Deprecated
+ public static final int headerEnd(final byte[] b, int ptr) {
+ return nextLfSkippingSplitLines(b, ptr);
+ }
+
+ /**
+ * Find the start of the contents of a given header.
+ *
+ * @param b
+ * buffer to scan.
+ * @param headerName
+ * header to search for
+ * @param ptr
+ * position within buffer to start looking for header at.
+ * @return new position at the start of the header's contents, -1 for
+ * not found
+ * @since 5.1
+ */
+ public static final int headerStart(byte[] headerName, byte[] b, int ptr) {
+ // Start by advancing to just past a LF or buffer start
+ if (ptr != 0) {
+ ptr = nextLF(b, ptr - 1);
+ }
+ while (ptr < b.length - (headerName.length + 1)) {
+ boolean found = true;
+ for (byte element : headerName) {
+ if (element != b[ptr++]) {
+ found = false;
+ break;
+ }
+ }
+ if (found && b[ptr++] == ' ') {
+ return ptr;
+ }
+ ptr = nextLF(b, ptr);
+ }
+ return -1;
+ }
+
+ /**
+ * Returns whether the message starts with any known headers.
+ *
+ * @param b
+ * buffer to scan.
+ * @return whether the message starts with any known headers
+ * @since 6.9
+ */
+ public static final boolean hasAnyKnownHeaders(byte[] b) {
+ return match(b, 0, tree) != -1 || match(b, 0, parent) != -1
+ || match(b, 0, author) != -1 || match(b, 0, committer) != -1
+ || match(b, 0, encoding) != -1 || match(b, 0, object) != -1
+ || match(b, 0, type) != -1 || match(b, 0, tag) != -1
+ || match(b, 0, tagger) != -1;
+ }
+
+ /**
* Locate the first position before a given character.
*
* @param b
@@ -465,7 +690,7 @@ public final class RawParseUtils {
* character to find.
* @return new position just before chrA, -1 for not found
*/
- public static final int prev(final byte[] b, int ptr, final char chrA) {
+ public static final int prev(byte[] b, int ptr, char chrA) {
if (ptr == b.length)
--ptr;
while (ptr >= 0) {
@@ -486,7 +711,7 @@ public final class RawParseUtils {
* position within buffer to start looking for LF at.
* @return new position just before the first LF found, -1 for not found
*/
- public static final int prevLF(final byte[] b, int ptr) {
+ public static final int prevLF(byte[] b, int ptr) {
return prev(b, ptr, '\n');
}
@@ -504,7 +729,7 @@ public final class RawParseUtils {
* @return new position just before the first chrA or LF to be found, -1 for
* not found
*/
- public static final int prevLF(final byte[] b, int ptr, final char chrA) {
+ public static final int prevLF(byte[] b, int ptr, char chrA) {
if (ptr == b.length)
--ptr;
while (ptr >= 0) {
@@ -519,7 +744,7 @@ public final class RawParseUtils {
* Index the region between <code>[ptr, end)</code> to find line starts.
* <p>
* The returned list is 1 indexed. Index 0 contains
- * {@link Integer#MIN_VALUE} to pad the list out.
+ * {@link java.lang.Integer#MIN_VALUE} to pad the list out.
* <p>
* Using a 1 indexed list means that line numbers can be directly accessed
* from the list, so <code>list.get(1)</code> (aka get line 1) returns
@@ -535,17 +760,56 @@ public final class RawParseUtils {
* line 1.
* @param end
* 1 past the end of the content within <code>buf</code>.
- * @return a line map indexing the start position of each line.
+ * @return a line map indicating the starting position of each line.
*/
- public static final IntList lineMap(final byte[] buf, int ptr, int end) {
+ public static final IntList lineMap(byte[] buf, int ptr, int end) {
+ IntList map = new IntList((end - ptr) / 36);
+ map.fillTo(1, Integer.MIN_VALUE);
+ for (; ptr < end; ptr = nextLF(buf, ptr)) {
+ map.add(ptr);
+ }
+ map.add(end);
+ return map;
+ }
+
+ /**
+ * Like {@link #lineMap(byte[], int, int)} but throw
+ * {@link BinaryBlobException} if a NUL byte is encountered.
+ *
+ * @param buf
+ * buffer to scan.
+ * @param ptr
+ * position within the buffer corresponding to the first byte of
+ * line 1.
+ * @param end
+ * 1 past the end of the content within <code>buf</code>.
+ * @return a line map indicating the starting position of each line.
+ * @throws BinaryBlobException
+ * if a NUL byte or a lone CR is found.
+ * @since 5.0
+ */
+ public static final IntList lineMapOrBinary(byte[] buf, int ptr, int end)
+ throws BinaryBlobException {
// Experimentally derived from multiple source repositories
// the average number of bytes/line is 36. Its a rough guess
// to initially size our map close to the target.
- //
- final IntList map = new IntList((end - ptr) / 36);
- map.fillTo(1, Integer.MIN_VALUE);
- for (; ptr < end; ptr = nextLF(buf, ptr))
- map.add(ptr);
+ IntList map = new IntList((end - ptr) / 36);
+ map.add(Integer.MIN_VALUE);
+ byte last = '\n'; // Must be \n to add the initial ptr
+ for (; ptr < end; ptr++) {
+ if (last == '\n') {
+ map.add(ptr);
+ }
+ byte curr = buf[ptr];
+ if (RawText.isBinary(curr, last)) {
+ throw new BinaryBlobException();
+ }
+ last = curr;
+ }
+ if (last == '\r') {
+ // Counts as binary
+ throw new BinaryBlobException();
+ }
map.add(end);
return map;
}
@@ -563,7 +827,7 @@ public final class RawParseUtils {
* character of the author's name. If no author header can be
* located -1 is returned.
*/
- public static final int author(final byte[] b, int ptr) {
+ public static final int author(byte[] b, int ptr) {
final int sz = b.length;
if (ptr == 0)
ptr += 46; // skip the "tree ..." line.
@@ -585,7 +849,7 @@ public final class RawParseUtils {
* character of the committer's name. If no committer header can be
* located -1 is returned.
*/
- public static final int committer(final byte[] b, int ptr) {
+ public static final int committer(byte[] b, int ptr) {
final int sz = b.length;
if (ptr == 0)
ptr += 46; // skip the "tree ..." line.
@@ -609,7 +873,7 @@ public final class RawParseUtils {
* character of the tagger's name. If no tagger header can be
* located -1 is returned.
*/
- public static final int tagger(final byte[] b, int ptr) {
+ public static final int tagger(byte[] b, int ptr) {
final int sz = b.length;
if (ptr == 0)
ptr += 48; // skip the "object ..." line.
@@ -637,7 +901,7 @@ public final class RawParseUtils {
* character of the encoding's name. If no encoding header can be
* located -1 is returned (and UTF-8 should be assumed).
*/
- public static final int encoding(final byte[] b, int ptr) {
+ public static final int encoding(byte[] b, int ptr) {
final int sz = b.length;
while (ptr < sz) {
if (b[ptr] == '\n')
@@ -650,40 +914,85 @@ public final class RawParseUtils {
}
/**
+ * Parse the "encoding " header as a string.
+ * <p>
+ * Locates the "encoding " header (if present) and returns its value.
+ *
+ * @param b
+ * buffer to scan.
+ * @return the encoding header as specified in the commit; null if the
+ * header was not present and should be assumed.
+ * @since 4.2
+ */
+ @Nullable
+ public static String parseEncodingName(byte[] b) {
+ int enc = encoding(b, 0);
+ if (enc < 0) {
+ return null;
+ }
+ int lf = nextLF(b, enc);
+ return decode(UTF_8, b, enc, lf - 1);
+ }
+
+ /**
* Parse the "encoding " header into a character set reference.
* <p>
* Locates the "encoding " header (if present) by first calling
* {@link #encoding(byte[], int)} and then returns the proper character set
* to apply to this buffer to evaluate its contents as character data.
* <p>
- * If no encoding header is present, {@link Constants#CHARSET} is assumed.
+ * If no encoding header is present {@code UTF-8} is assumed.
*
* @param b
* buffer to scan.
* @return the Java character set representation. Never null.
+ * @throws IllegalCharsetNameException
+ * if the character set requested by the encoding header is
+ * malformed and unsupportable.
+ * @throws UnsupportedCharsetException
+ * if the JRE does not support the character set requested by
+ * the encoding header.
*/
- public static Charset parseEncoding(final byte[] b) {
- final int enc = encoding(b, 0);
- if (enc < 0)
- return Constants.CHARSET;
- final int lf = nextLF(b, enc);
- String decoded = decode(Constants.CHARSET, b, enc, lf - 1);
+ public static Charset parseEncoding(byte[] b) {
+ String enc = parseEncodingName(b);
+ if (enc == null) {
+ return UTF_8;
+ }
+
+ String name = enc.trim();
try {
- return Charset.forName(decoded);
- } catch (IllegalCharsetNameException badName) {
- Charset aliased = charsetForAlias(decoded);
- if (aliased != null)
- return aliased;
- throw badName;
- } catch (UnsupportedCharsetException badName) {
- Charset aliased = charsetForAlias(decoded);
- if (aliased != null)
+ return Charset.forName(name);
+ } catch (IllegalCharsetNameException
+ | UnsupportedCharsetException badName) {
+ Charset aliased = charsetForAlias(name);
+ if (aliased != null) {
return aliased;
+ }
throw badName;
}
}
/**
+ * Parse the "encoding " header into a character set reference.
+ * <p>
+ * If unsuccessful, return UTF-8.
+ *
+ * @param buffer
+ * buffer to scan.
+ * @return the Java character set representation. Never null. Default to
+ * UTF-8.
+ * @see #parseEncoding(byte[])
+ * @since 6.7
+ */
+ public static Charset guessEncoding(byte[] buffer) {
+ try {
+ return parseEncoding(buffer);
+ } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
+ return UTF_8;
+ }
+ }
+
+ /**
* Parse a name string (e.g. author, committer, tagger) into a PersonIdent.
* <p>
* Leading spaces won't be trimmed from the string, i.e. will show up in the
@@ -694,7 +1003,7 @@ public final class RawParseUtils {
* @return the parsed identity or null in case the identity could not be
* parsed.
*/
- public static PersonIdent parsePersonIdent(final String in) {
+ public static PersonIdent parsePersonIdent(String in) {
return parsePersonIdent(Constants.encode(in), 0);
}
@@ -716,8 +1025,16 @@ public final class RawParseUtils {
* @return the parsed identity or null in case the identity could not be
* parsed.
*/
- public static PersonIdent parsePersonIdent(final byte[] raw, final int nameB) {
- final Charset cs = parseEncoding(raw);
+ public static PersonIdent parsePersonIdent(byte[] raw, int nameB) {
+ Charset cs;
+ try {
+ cs = parseEncoding(raw);
+ } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
+ // Assume UTF-8 for person identities, usually this is correct.
+ // If not decode() will fall back to the ISO-8859-1 encoding.
+ cs = UTF_8;
+ }
+
final int emailB = nextLF(raw, nameB, '<');
final int emailE = nextLF(raw, emailB, '>');
if (emailB >= raw.length || raw[emailB] == '\n' ||
@@ -739,17 +1056,19 @@ public final class RawParseUtils {
// character if there is no trailing LF.
final int tzBegin = lastIndexOfTrim(raw, ' ',
nextLF(raw, emailE - 1) - 2) + 1;
- if (tzBegin <= emailE) // No time/zone, still valid
- return new PersonIdent(name, email, 0, 0);
+ if (tzBegin <= emailE) { // No time/zone, still valid
+ return new PersonIdent(name, email, EPOCH, UTC);
+ }
final int whenBegin = Math.max(emailE,
lastIndexOfTrim(raw, ' ', tzBegin - 1) + 1);
- if (whenBegin >= tzBegin - 1) // No time/zone, still valid
- return new PersonIdent(name, email, 0, 0);
+ if (whenBegin >= tzBegin - 1) { // No time/zone, still valid
+ return new PersonIdent(name, email, EPOCH, UTC);
+ }
- final long when = parseLongBase10(raw, whenBegin, null);
- final int tz = parseTimeZoneOffset(raw, tzBegin);
- return new PersonIdent(name, email, when * 1000L, tz);
+ long when = parseLongBase10(raw, whenBegin, null);
+ return new PersonIdent(name, email, Instant.ofEpochSecond(when),
+ parseZoneOffset(raw, tzBegin, null));
}
/**
@@ -787,16 +1106,16 @@ public final class RawParseUtils {
name = decode(raw, nameB, stop);
final MutableInteger ptrout = new MutableInteger();
- long when;
- int tz;
+ Instant when;
+ ZoneId tz;
if (emailE < stop) {
- when = parseLongBase10(raw, emailE + 1, ptrout);
- tz = parseTimeZoneOffset(raw, ptrout.value);
+ when = Instant.ofEpochSecond(parseLongBase10(raw, emailE + 1, ptrout));
+ tz = parseZoneOffset(raw, ptrout.value, null);
} else {
- when = 0;
- tz = 0;
+ when = EPOCH;
+ tz = UTC;
}
- return new PersonIdent(name, email, when * 1000L, tz);
+ return new PersonIdent(name, email, when, tz);
}
/**
@@ -816,7 +1135,7 @@ public final class RawParseUtils {
* @return position of the ':' which terminates the footer line key if this
* is otherwise a valid footer line key; otherwise -1.
*/
- public static int endOfFooterLineKey(final byte[] raw, int ptr) {
+ public static int endOfFooterLineKey(byte[] raw, int ptr) {
try {
for (;;) {
final byte c = raw[ptr];
@@ -843,7 +1162,7 @@ public final class RawParseUtils {
* @return a string representation of the range <code>[start,end)</code>,
* after decoding the region through the specified character set.
*/
- public static String decode(final byte[] buffer) {
+ public static String decode(byte[] buffer) {
return decode(buffer, 0, buffer.length);
}
@@ -865,7 +1184,7 @@ public final class RawParseUtils {
*/
public static String decode(final byte[] buffer, final int start,
final int end) {
- return decode(Constants.CHARSET, buffer, start, end);
+ return decode(UTF_8, buffer, start, end);
}
/**
@@ -881,7 +1200,7 @@ public final class RawParseUtils {
* @return a string representation of the range <code>[start,end)</code>,
* after decoding the region through the specified character set.
*/
- public static String decode(final Charset cs, final byte[] buffer) {
+ public static String decode(Charset cs, byte[] buffer) {
return decode(cs, buffer, 0, buffer.length);
}
@@ -933,29 +1252,27 @@ public final class RawParseUtils {
* data from.
* @return a string representation of the range <code>[start,end)</code>,
* after decoding the region through the specified character set.
- * @throws CharacterCodingException
+ * @throws java.nio.charset.CharacterCodingException
* the input is not in any of the tested character sets.
*/
public static String decodeNoFallback(final Charset cs,
final byte[] buffer, final int start, final int end)
throws CharacterCodingException {
- final ByteBuffer b = ByteBuffer.wrap(buffer, start, end - start);
+ ByteBuffer b = ByteBuffer.wrap(buffer, start, end - start);
b.mark();
// Try our built-in favorite. The assumption here is that
// decoding will fail if the data is not actually encoded
// using that encoder.
- //
try {
- return decode(b, Constants.CHARSET);
+ return decode(b, UTF_8);
} catch (CharacterCodingException e) {
b.reset();
}
- if (!cs.equals(Constants.CHARSET)) {
+ if (!cs.equals(UTF_8)) {
// Try the suggested encoding, it might be right since it was
// provided by the caller.
- //
try {
return decode(b, cs);
} catch (CharacterCodingException e) {
@@ -965,9 +1282,8 @@ public final class RawParseUtils {
// Try the default character set. A small group of people
// might actually use the same (or very similar) locale.
- //
- final Charset defcs = Charset.defaultCharset();
- if (!defcs.equals(cs) && !defcs.equals(Constants.CHARSET)) {
+ Charset defcs = SystemReader.getInstance().getDefaultCharset();
+ if (!defcs.equals(cs) && !defcs.equals(UTF_8)) {
try {
return decode(b, defcs);
} catch (CharacterCodingException e) {
@@ -1001,7 +1317,7 @@ public final class RawParseUtils {
return r.toString();
}
- private static String decode(final ByteBuffer b, final Charset charset)
+ private static String decode(ByteBuffer b, Charset charset)
throws CharacterCodingException {
final CharsetDecoder d = charset.newDecoder();
d.onMalformedInput(CodingErrorAction.REPORT);
@@ -1020,7 +1336,7 @@ public final class RawParseUtils {
* commit buffer.
* @return position of the user's message buffer.
*/
- public static final int commitMessage(final byte[] b, int ptr) {
+ public static final int commitMessage(byte[] b, int ptr) {
final int sz = b.length;
if (ptr == 0)
ptr += 46; // skip the "tree ..." line.
@@ -1044,10 +1360,11 @@ public final class RawParseUtils {
* buffer.
* @return position of the user's message buffer.
*/
- public static final int tagMessage(final byte[] b, int ptr) {
+ public static final int tagMessage(byte[] b, int ptr) {
final int sz = b.length;
if (ptr == 0)
ptr += 48; // skip the "object ..." line.
+ // Assume the rest of the current paragraph is all headers.
while (ptr < sz && b[ptr] != '\n')
ptr = nextLF(b, ptr);
if (ptr < sz && b[ptr] == '\n')
@@ -1069,7 +1386,7 @@ public final class RawParseUtils {
* @return position of the LF at the end of the paragraph;
* <code>b.length</code> if no paragraph end could be located.
*/
- public static final int endOfParagraph(final byte[] b, final int start) {
+ public static final int endOfParagraph(byte[] b, int start) {
int ptr = start;
final int sz = b.length;
while (ptr < sz && (b[ptr] != '\n' && b[ptr] != '\r'))
@@ -1081,7 +1398,19 @@ public final class RawParseUtils {
return ptr;
}
- private static int lastIndexOfTrim(byte[] raw, char ch, int pos) {
+ /**
+ * Get last index of {@code ch} in raw, trimming spaces.
+ *
+ * @param raw
+ * buffer to scan.
+ * @param ch
+ * character to find.
+ * @param pos
+ * starting position.
+ * @return last index of {@code ch} in raw, trimming spaces.
+ * @since 4.1
+ */
+ public static int lastIndexOfTrim(byte[] raw, char ch, int pos) {
while (pos >= 0 && raw[pos] == ' ')
pos--;