diff options
Diffstat (limited to 'org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java')
-rw-r--r-- | org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java | 573 |
1 files changed, 451 insertions, 122 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java index 3c2460cad7..3ed72516c7 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java @@ -1,53 +1,29 @@ /* * Copyright (C) 2008-2009, Google Inc. - * Copyright (C) 2006-2008, Shawn O. Pearce <spearce@spearce.org> - * and other copyright owners as documented in the project's IP log. + * Copyright (C) 2006-2008, Shawn O. Pearce <spearce@spearce.org> and others * - * This program and the accompanying materials are made available - * under the terms of the Eclipse Distribution License v1.0 which - * accompanies this distribution, is reproduced below, and is - * available at http://www.eclipse.org/org/documents/edl-v10.php + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * - Neither the name of the Eclipse Foundation, Inc. nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND - * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.util; +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.time.Instant.EPOCH; +import static java.time.ZoneOffset.UTC; import static org.eclipse.jgit.lib.ObjectChecker.author; import static org.eclipse.jgit.lib.ObjectChecker.committer; import static org.eclipse.jgit.lib.ObjectChecker.encoding; +import static org.eclipse.jgit.lib.ObjectChecker.object; +import static org.eclipse.jgit.lib.ObjectChecker.parent; +import static org.eclipse.jgit.lib.ObjectChecker.tag; import static org.eclipse.jgit.lib.ObjectChecker.tagger; +import static org.eclipse.jgit.lib.ObjectChecker.tree; +import static org.eclipse.jgit.lib.ObjectChecker.type; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; @@ -56,21 +32,24 @@ import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.UnsupportedCharsetException; +import java.time.DateTimeException; +import java.time.Instant; +import java.time.ZoneId; +import java.time.ZoneOffset; import java.util.Arrays; import java.util.HashMap; import java.util.Map; +import org.eclipse.jgit.annotations.Nullable; +import org.eclipse.jgit.diff.RawText; +import org.eclipse.jgit.errors.BinaryBlobException; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.PersonIdent; -/** Handy utility functions to parse raw object contents. */ +/** + * Handy utility functions to parse raw object contents. + */ public final class RawParseUtils { - /** - * UTF-8 charset constant. - * - * @since 2.2 - */ - public static final Charset UTF8_CHARSET = Charset.forName("UTF-8"); //$NON-NLS-1$ private static final byte[] digits10; @@ -81,8 +60,9 @@ public final class RawParseUtils { private static final Map<String, Charset> encodingAliases; static { - encodingAliases = new HashMap<String, Charset>(); - encodingAliases.put("latin-1", Charset.forName("ISO-8859-1")); //$NON-NLS-1$ //$NON-NLS-2$ + encodingAliases = new HashMap<>(); + encodingAliases.put("latin-1", ISO_8859_1); //$NON-NLS-1$ + encodingAliases.put("iso-latin-1", ISO_8859_1); //$NON-NLS-1$ digits10 = new byte['9' + 1]; Arrays.fill(digits10, (byte) -1); @@ -119,7 +99,7 @@ public final class RawParseUtils { * the buffer to test for equality with b. * @return ptr + src.length if b[ptr..src.length] == src; else -1. */ - public static final int match(final byte[] b, int ptr, final byte[] src) { + public static final int match(byte[] b, int ptr, byte[] src) { if (ptr + src.length > b.length) return -1; for (int i = 0; i < src.length; i++, ptr++) @@ -302,7 +282,7 @@ public final class RawParseUtils { * @param p * first position within the buffer to parse. * @return the integer value. - * @throws ArrayIndexOutOfBoundsException + * @throws java.lang.ArrayIndexOutOfBoundsException * if the string is not hex formatted. */ public static final int parseHexInt16(final byte[] bs, final int p) { @@ -332,7 +312,7 @@ public final class RawParseUtils { * @param p * first position within the buffer to parse. * @return the integer value. - * @throws ArrayIndexOutOfBoundsException + * @throws java.lang.ArrayIndexOutOfBoundsException * if the string is not hex formatted. */ public static final int parseHexInt32(final byte[] bs, final int p) { @@ -362,12 +342,79 @@ public final class RawParseUtils { } /** + * Parse 16 character base 16 (hex) formatted string to unsigned long. + * <p> + * The number is read in network byte order, that is, most significant + * nibble first. + * + * @param bs + * buffer to parse digits from; positions {@code [p, p+16)} will + * be parsed. + * @param p + * first position within the buffer to parse. + * @return the integer value. + * @throws java.lang.ArrayIndexOutOfBoundsException + * if the string is not hex formatted. + * @since 4.3 + */ + @SuppressWarnings("IntLongMath") + public static final long parseHexInt64(final byte[] bs, final int p) { + long r = digits16[bs[p]] << 4; + + r |= digits16[bs[p + 1]]; + r <<= 4; + + r |= digits16[bs[p + 2]]; + r <<= 4; + + r |= digits16[bs[p + 3]]; + r <<= 4; + + r |= digits16[bs[p + 4]]; + r <<= 4; + + r |= digits16[bs[p + 5]]; + r <<= 4; + + r |= digits16[bs[p + 6]]; + r <<= 4; + + r |= digits16[bs[p + 7]]; + r <<= 4; + + r |= digits16[bs[p + 8]]; + r <<= 4; + + r |= digits16[bs[p + 9]]; + r <<= 4; + + r |= digits16[bs[p + 10]]; + r <<= 4; + + r |= digits16[bs[p + 11]]; + r <<= 4; + + r |= digits16[bs[p + 12]]; + r <<= 4; + + r |= digits16[bs[p + 13]]; + r <<= 4; + + r |= digits16[bs[p + 14]]; + + final int last = digits16[bs[p + 15]]; + if (r < 0 || last < 0) + throw new ArrayIndexOutOfBoundsException(); + return (r << 4) | last; + } + + /** * Parse a single hex digit to its numeric value (0-15). * * @param digit * hex character to parse. * @return numeric value, in the range 0-15. - * @throws ArrayIndexOutOfBoundsException + * @throws java.lang.ArrayIndexOutOfBoundsException * if the input digit is not a valid hex digit. */ public static final int parseHexInt4(final byte digit) { @@ -389,14 +436,58 @@ public final class RawParseUtils { * position within buffer to start parsing digits at. * @return the timezone at this location, expressed in minutes. */ - public static final int parseTimeZoneOffset(final byte[] b, int ptr) { - final int v = parseBase10(b, ptr, null); + public static final int parseTimeZoneOffset(byte[] b, int ptr) { + return parseTimeZoneOffset(b, ptr, null); + } + + /** + * Parse a Git style timezone string. + * <p> + * The sequence "-0315" will be parsed as the numeric value -195, as the + * lower two positions count minutes, not 100ths of an hour. + * + * @param b + * buffer to scan. + * @param ptr + * position within buffer to start parsing digits at. + * @param ptrResult + * optional location to return the new ptr value through. If null + * the ptr value will be discarded. + * @return the timezone at this location, expressed in minutes. + * @since 4.1 + */ + public static final int parseTimeZoneOffset(final byte[] b, int ptr, + MutableInteger ptrResult) { + final int v = parseBase10(b, ptr, ptrResult); final int tzMins = v % 100; final int tzHours = v / 100; return tzHours * 60 + tzMins; } /** + * Parse a Git style timezone string in [+-]hhmm format + * + * @param b + * buffer to scan. + * @param ptr + * position within buffer to start parsing digits at. + * @param ptrResult + * optional location to return the new ptr value through. If null + * the ptr value will be discarded. + * @return the ZoneOffset represention of the timezone offset string. + * Invalid offsets default to UTC. + */ + private static ZoneId parseZoneOffset(final byte[] b, int ptr, + MutableInteger ptrResult) { + int hhmm = parseBase10(b, ptr, ptrResult); + try { + return ZoneOffset.ofHoursMinutes(hhmm / 100, hhmm % 100); + } catch (DateTimeException e) { + return UTC; + } + } + + /** * Locate the first position after a given character. * * @param b @@ -407,7 +498,7 @@ public final class RawParseUtils { * character to find. * @return new position just after chrA. */ - public static final int next(final byte[] b, int ptr, final char chrA) { + public static final int next(byte[] b, int ptr, char chrA) { final int sz = b.length; while (ptr < sz) { if (b[ptr++] == chrA) @@ -427,7 +518,7 @@ public final class RawParseUtils { * position within buffer to start looking for LF at. * @return new position just after the first LF found. */ - public static final int nextLF(final byte[] b, int ptr) { + public static final int nextLF(byte[] b, int ptr) { return next(b, ptr, '\n'); } @@ -444,7 +535,7 @@ public final class RawParseUtils { * character to find. * @return new position just after the first chrA or LF to be found. */ - public static final int nextLF(final byte[] b, int ptr, final char chrA) { + public static final int nextLF(byte[] b, int ptr, char chrA) { final int sz = b.length; while (ptr < sz) { final byte c = b[ptr++]; @@ -455,6 +546,140 @@ public final class RawParseUtils { } /** + * Locate the first end of line after the given position, while treating + * following lines which are starting with spaces as part of the current + * line. + * <p> + * For example, {@code nextLfSkippingSplitLines( + * "row \n with space at beginning of a following line\nThe actual next line", + * 0)} will return the position of {@code "\nThe actual next line"}. + * + * @param b + * buffer to scan. + * @param ptr + * position within buffer to start looking for the next line. + * @return new position just after the line end of the last line-split. This + * is either b.length, or the index of the current split-line's + * terminating newline. + * @since 6.9 + */ + public static final int nextLfSkippingSplitLines(final byte[] b, int ptr) { + final int sz = b.length; + while (ptr < sz) { + final byte c = b[ptr++]; + if (c == '\n' && (ptr == sz || b[ptr] != ' ')) { + return ptr - 1; + } + } + return ptr; + } + + /** + * Extract a part of a buffer as a header value, removing the single blanks + * at the front of continuation lines. + * + * @param b + * buffer to extract the header from + * @param start + * of the header value, see + * {@link #headerStart(byte[], byte[], int)} + * @param end + * of the header; see + * {@link #nextLfSkippingSplitLines(byte[], int)} + * @return the header value, with blanks indicating continuation lines + * stripped + * @since 6.9 + */ + public static final byte[] headerValue(final byte[] b, int start, int end) { + byte[] data = new byte[end - start]; + int out = 0; + byte last = '\0'; + for (int in = start; in < end; in++) { + byte ch = b[in]; + if (ch != ' ' || last != '\n') { + data[out++] = ch; + } + last = ch; + } + if (out == data.length) { + return data; + } + return Arrays.copyOf(data, out); + } + + /** + * Locate the first end of header after the given position. Note that + * headers may be more than one line long. + * <p> + * Also note that there might be multiple headers. If you wish to find the + * last header's end - call this in a loop. + * + * @param b + * buffer to scan. + * @param ptr + * position within buffer to start looking for the header + * (normally a new-line). + * @return new position just after the line end. This is either b.length, or + * the index of the header's terminating newline. + * @since 5.1 + * @deprecated use {{@link #nextLfSkippingSplitLines}} directly instead + */ + @Deprecated + public static final int headerEnd(final byte[] b, int ptr) { + return nextLfSkippingSplitLines(b, ptr); + } + + /** + * Find the start of the contents of a given header. + * + * @param b + * buffer to scan. + * @param headerName + * header to search for + * @param ptr + * position within buffer to start looking for header at. + * @return new position at the start of the header's contents, -1 for + * not found + * @since 5.1 + */ + public static final int headerStart(byte[] headerName, byte[] b, int ptr) { + // Start by advancing to just past a LF or buffer start + if (ptr != 0) { + ptr = nextLF(b, ptr - 1); + } + while (ptr < b.length - (headerName.length + 1)) { + boolean found = true; + for (byte element : headerName) { + if (element != b[ptr++]) { + found = false; + break; + } + } + if (found && b[ptr++] == ' ') { + return ptr; + } + ptr = nextLF(b, ptr); + } + return -1; + } + + /** + * Returns whether the message starts with any known headers. + * + * @param b + * buffer to scan. + * @return whether the message starts with any known headers + * @since 6.9 + */ + public static final boolean hasAnyKnownHeaders(byte[] b) { + return match(b, 0, tree) != -1 || match(b, 0, parent) != -1 + || match(b, 0, author) != -1 || match(b, 0, committer) != -1 + || match(b, 0, encoding) != -1 || match(b, 0, object) != -1 + || match(b, 0, type) != -1 || match(b, 0, tag) != -1 + || match(b, 0, tagger) != -1; + } + + /** * Locate the first position before a given character. * * @param b @@ -465,7 +690,7 @@ public final class RawParseUtils { * character to find. * @return new position just before chrA, -1 for not found */ - public static final int prev(final byte[] b, int ptr, final char chrA) { + public static final int prev(byte[] b, int ptr, char chrA) { if (ptr == b.length) --ptr; while (ptr >= 0) { @@ -486,7 +711,7 @@ public final class RawParseUtils { * position within buffer to start looking for LF at. * @return new position just before the first LF found, -1 for not found */ - public static final int prevLF(final byte[] b, int ptr) { + public static final int prevLF(byte[] b, int ptr) { return prev(b, ptr, '\n'); } @@ -504,7 +729,7 @@ public final class RawParseUtils { * @return new position just before the first chrA or LF to be found, -1 for * not found */ - public static final int prevLF(final byte[] b, int ptr, final char chrA) { + public static final int prevLF(byte[] b, int ptr, char chrA) { if (ptr == b.length) --ptr; while (ptr >= 0) { @@ -519,7 +744,7 @@ public final class RawParseUtils { * Index the region between <code>[ptr, end)</code> to find line starts. * <p> * The returned list is 1 indexed. Index 0 contains - * {@link Integer#MIN_VALUE} to pad the list out. + * {@link java.lang.Integer#MIN_VALUE} to pad the list out. * <p> * Using a 1 indexed list means that line numbers can be directly accessed * from the list, so <code>list.get(1)</code> (aka get line 1) returns @@ -535,17 +760,56 @@ public final class RawParseUtils { * line 1. * @param end * 1 past the end of the content within <code>buf</code>. - * @return a line map indexing the start position of each line. + * @return a line map indicating the starting position of each line. */ - public static final IntList lineMap(final byte[] buf, int ptr, int end) { + public static final IntList lineMap(byte[] buf, int ptr, int end) { + IntList map = new IntList((end - ptr) / 36); + map.fillTo(1, Integer.MIN_VALUE); + for (; ptr < end; ptr = nextLF(buf, ptr)) { + map.add(ptr); + } + map.add(end); + return map; + } + + /** + * Like {@link #lineMap(byte[], int, int)} but throw + * {@link BinaryBlobException} if a NUL byte is encountered. + * + * @param buf + * buffer to scan. + * @param ptr + * position within the buffer corresponding to the first byte of + * line 1. + * @param end + * 1 past the end of the content within <code>buf</code>. + * @return a line map indicating the starting position of each line. + * @throws BinaryBlobException + * if a NUL byte or a lone CR is found. + * @since 5.0 + */ + public static final IntList lineMapOrBinary(byte[] buf, int ptr, int end) + throws BinaryBlobException { // Experimentally derived from multiple source repositories // the average number of bytes/line is 36. Its a rough guess // to initially size our map close to the target. - // - final IntList map = new IntList((end - ptr) / 36); - map.fillTo(1, Integer.MIN_VALUE); - for (; ptr < end; ptr = nextLF(buf, ptr)) - map.add(ptr); + IntList map = new IntList((end - ptr) / 36); + map.add(Integer.MIN_VALUE); + byte last = '\n'; // Must be \n to add the initial ptr + for (; ptr < end; ptr++) { + if (last == '\n') { + map.add(ptr); + } + byte curr = buf[ptr]; + if (RawText.isBinary(curr, last)) { + throw new BinaryBlobException(); + } + last = curr; + } + if (last == '\r') { + // Counts as binary + throw new BinaryBlobException(); + } map.add(end); return map; } @@ -563,7 +827,7 @@ public final class RawParseUtils { * character of the author's name. If no author header can be * located -1 is returned. */ - public static final int author(final byte[] b, int ptr) { + public static final int author(byte[] b, int ptr) { final int sz = b.length; if (ptr == 0) ptr += 46; // skip the "tree ..." line. @@ -585,7 +849,7 @@ public final class RawParseUtils { * character of the committer's name. If no committer header can be * located -1 is returned. */ - public static final int committer(final byte[] b, int ptr) { + public static final int committer(byte[] b, int ptr) { final int sz = b.length; if (ptr == 0) ptr += 46; // skip the "tree ..." line. @@ -609,7 +873,7 @@ public final class RawParseUtils { * character of the tagger's name. If no tagger header can be * located -1 is returned. */ - public static final int tagger(final byte[] b, int ptr) { + public static final int tagger(byte[] b, int ptr) { final int sz = b.length; if (ptr == 0) ptr += 48; // skip the "object ..." line. @@ -637,7 +901,7 @@ public final class RawParseUtils { * character of the encoding's name. If no encoding header can be * located -1 is returned (and UTF-8 should be assumed). */ - public static final int encoding(final byte[] b, int ptr) { + public static final int encoding(byte[] b, int ptr) { final int sz = b.length; while (ptr < sz) { if (b[ptr] == '\n') @@ -650,40 +914,85 @@ public final class RawParseUtils { } /** + * Parse the "encoding " header as a string. + * <p> + * Locates the "encoding " header (if present) and returns its value. + * + * @param b + * buffer to scan. + * @return the encoding header as specified in the commit; null if the + * header was not present and should be assumed. + * @since 4.2 + */ + @Nullable + public static String parseEncodingName(byte[] b) { + int enc = encoding(b, 0); + if (enc < 0) { + return null; + } + int lf = nextLF(b, enc); + return decode(UTF_8, b, enc, lf - 1); + } + + /** * Parse the "encoding " header into a character set reference. * <p> * Locates the "encoding " header (if present) by first calling * {@link #encoding(byte[], int)} and then returns the proper character set * to apply to this buffer to evaluate its contents as character data. * <p> - * If no encoding header is present, {@link Constants#CHARSET} is assumed. + * If no encoding header is present {@code UTF-8} is assumed. * * @param b * buffer to scan. * @return the Java character set representation. Never null. + * @throws IllegalCharsetNameException + * if the character set requested by the encoding header is + * malformed and unsupportable. + * @throws UnsupportedCharsetException + * if the JRE does not support the character set requested by + * the encoding header. */ - public static Charset parseEncoding(final byte[] b) { - final int enc = encoding(b, 0); - if (enc < 0) - return Constants.CHARSET; - final int lf = nextLF(b, enc); - String decoded = decode(Constants.CHARSET, b, enc, lf - 1); + public static Charset parseEncoding(byte[] b) { + String enc = parseEncodingName(b); + if (enc == null) { + return UTF_8; + } + + String name = enc.trim(); try { - return Charset.forName(decoded); - } catch (IllegalCharsetNameException badName) { - Charset aliased = charsetForAlias(decoded); - if (aliased != null) - return aliased; - throw badName; - } catch (UnsupportedCharsetException badName) { - Charset aliased = charsetForAlias(decoded); - if (aliased != null) + return Charset.forName(name); + } catch (IllegalCharsetNameException + | UnsupportedCharsetException badName) { + Charset aliased = charsetForAlias(name); + if (aliased != null) { return aliased; + } throw badName; } } /** + * Parse the "encoding " header into a character set reference. + * <p> + * If unsuccessful, return UTF-8. + * + * @param buffer + * buffer to scan. + * @return the Java character set representation. Never null. Default to + * UTF-8. + * @see #parseEncoding(byte[]) + * @since 6.7 + */ + public static Charset guessEncoding(byte[] buffer) { + try { + return parseEncoding(buffer); + } catch (IllegalCharsetNameException | UnsupportedCharsetException e) { + return UTF_8; + } + } + + /** * Parse a name string (e.g. author, committer, tagger) into a PersonIdent. * <p> * Leading spaces won't be trimmed from the string, i.e. will show up in the @@ -694,7 +1003,7 @@ public final class RawParseUtils { * @return the parsed identity or null in case the identity could not be * parsed. */ - public static PersonIdent parsePersonIdent(final String in) { + public static PersonIdent parsePersonIdent(String in) { return parsePersonIdent(Constants.encode(in), 0); } @@ -716,8 +1025,16 @@ public final class RawParseUtils { * @return the parsed identity or null in case the identity could not be * parsed. */ - public static PersonIdent parsePersonIdent(final byte[] raw, final int nameB) { - final Charset cs = parseEncoding(raw); + public static PersonIdent parsePersonIdent(byte[] raw, int nameB) { + Charset cs; + try { + cs = parseEncoding(raw); + } catch (IllegalCharsetNameException | UnsupportedCharsetException e) { + // Assume UTF-8 for person identities, usually this is correct. + // If not decode() will fall back to the ISO-8859-1 encoding. + cs = UTF_8; + } + final int emailB = nextLF(raw, nameB, '<'); final int emailE = nextLF(raw, emailB, '>'); if (emailB >= raw.length || raw[emailB] == '\n' || @@ -739,17 +1056,19 @@ public final class RawParseUtils { // character if there is no trailing LF. final int tzBegin = lastIndexOfTrim(raw, ' ', nextLF(raw, emailE - 1) - 2) + 1; - if (tzBegin <= emailE) // No time/zone, still valid - return new PersonIdent(name, email, 0, 0); + if (tzBegin <= emailE) { // No time/zone, still valid + return new PersonIdent(name, email, EPOCH, UTC); + } final int whenBegin = Math.max(emailE, lastIndexOfTrim(raw, ' ', tzBegin - 1) + 1); - if (whenBegin >= tzBegin - 1) // No time/zone, still valid - return new PersonIdent(name, email, 0, 0); + if (whenBegin >= tzBegin - 1) { // No time/zone, still valid + return new PersonIdent(name, email, EPOCH, UTC); + } - final long when = parseLongBase10(raw, whenBegin, null); - final int tz = parseTimeZoneOffset(raw, tzBegin); - return new PersonIdent(name, email, when * 1000L, tz); + long when = parseLongBase10(raw, whenBegin, null); + return new PersonIdent(name, email, Instant.ofEpochSecond(when), + parseZoneOffset(raw, tzBegin, null)); } /** @@ -787,16 +1106,16 @@ public final class RawParseUtils { name = decode(raw, nameB, stop); final MutableInteger ptrout = new MutableInteger(); - long when; - int tz; + Instant when; + ZoneId tz; if (emailE < stop) { - when = parseLongBase10(raw, emailE + 1, ptrout); - tz = parseTimeZoneOffset(raw, ptrout.value); + when = Instant.ofEpochSecond(parseLongBase10(raw, emailE + 1, ptrout)); + tz = parseZoneOffset(raw, ptrout.value, null); } else { - when = 0; - tz = 0; + when = EPOCH; + tz = UTC; } - return new PersonIdent(name, email, when * 1000L, tz); + return new PersonIdent(name, email, when, tz); } /** @@ -816,7 +1135,7 @@ public final class RawParseUtils { * @return position of the ':' which terminates the footer line key if this * is otherwise a valid footer line key; otherwise -1. */ - public static int endOfFooterLineKey(final byte[] raw, int ptr) { + public static int endOfFooterLineKey(byte[] raw, int ptr) { try { for (;;) { final byte c = raw[ptr]; @@ -843,7 +1162,7 @@ public final class RawParseUtils { * @return a string representation of the range <code>[start,end)</code>, * after decoding the region through the specified character set. */ - public static String decode(final byte[] buffer) { + public static String decode(byte[] buffer) { return decode(buffer, 0, buffer.length); } @@ -865,7 +1184,7 @@ public final class RawParseUtils { */ public static String decode(final byte[] buffer, final int start, final int end) { - return decode(Constants.CHARSET, buffer, start, end); + return decode(UTF_8, buffer, start, end); } /** @@ -881,7 +1200,7 @@ public final class RawParseUtils { * @return a string representation of the range <code>[start,end)</code>, * after decoding the region through the specified character set. */ - public static String decode(final Charset cs, final byte[] buffer) { + public static String decode(Charset cs, byte[] buffer) { return decode(cs, buffer, 0, buffer.length); } @@ -933,29 +1252,27 @@ public final class RawParseUtils { * data from. * @return a string representation of the range <code>[start,end)</code>, * after decoding the region through the specified character set. - * @throws CharacterCodingException + * @throws java.nio.charset.CharacterCodingException * the input is not in any of the tested character sets. */ public static String decodeNoFallback(final Charset cs, final byte[] buffer, final int start, final int end) throws CharacterCodingException { - final ByteBuffer b = ByteBuffer.wrap(buffer, start, end - start); + ByteBuffer b = ByteBuffer.wrap(buffer, start, end - start); b.mark(); // Try our built-in favorite. The assumption here is that // decoding will fail if the data is not actually encoded // using that encoder. - // try { - return decode(b, Constants.CHARSET); + return decode(b, UTF_8); } catch (CharacterCodingException e) { b.reset(); } - if (!cs.equals(Constants.CHARSET)) { + if (!cs.equals(UTF_8)) { // Try the suggested encoding, it might be right since it was // provided by the caller. - // try { return decode(b, cs); } catch (CharacterCodingException e) { @@ -965,9 +1282,8 @@ public final class RawParseUtils { // Try the default character set. A small group of people // might actually use the same (or very similar) locale. - // - final Charset defcs = Charset.defaultCharset(); - if (!defcs.equals(cs) && !defcs.equals(Constants.CHARSET)) { + Charset defcs = SystemReader.getInstance().getDefaultCharset(); + if (!defcs.equals(cs) && !defcs.equals(UTF_8)) { try { return decode(b, defcs); } catch (CharacterCodingException e) { @@ -1001,7 +1317,7 @@ public final class RawParseUtils { return r.toString(); } - private static String decode(final ByteBuffer b, final Charset charset) + private static String decode(ByteBuffer b, Charset charset) throws CharacterCodingException { final CharsetDecoder d = charset.newDecoder(); d.onMalformedInput(CodingErrorAction.REPORT); @@ -1020,7 +1336,7 @@ public final class RawParseUtils { * commit buffer. * @return position of the user's message buffer. */ - public static final int commitMessage(final byte[] b, int ptr) { + public static final int commitMessage(byte[] b, int ptr) { final int sz = b.length; if (ptr == 0) ptr += 46; // skip the "tree ..." line. @@ -1044,10 +1360,11 @@ public final class RawParseUtils { * buffer. * @return position of the user's message buffer. */ - public static final int tagMessage(final byte[] b, int ptr) { + public static final int tagMessage(byte[] b, int ptr) { final int sz = b.length; if (ptr == 0) ptr += 48; // skip the "object ..." line. + // Assume the rest of the current paragraph is all headers. while (ptr < sz && b[ptr] != '\n') ptr = nextLF(b, ptr); if (ptr < sz && b[ptr] == '\n') @@ -1069,7 +1386,7 @@ public final class RawParseUtils { * @return position of the LF at the end of the paragraph; * <code>b.length</code> if no paragraph end could be located. */ - public static final int endOfParagraph(final byte[] b, final int start) { + public static final int endOfParagraph(byte[] b, int start) { int ptr = start; final int sz = b.length; while (ptr < sz && (b[ptr] != '\n' && b[ptr] != '\r')) @@ -1081,7 +1398,19 @@ public final class RawParseUtils { return ptr; } - private static int lastIndexOfTrim(byte[] raw, char ch, int pos) { + /** + * Get last index of {@code ch} in raw, trimming spaces. + * + * @param raw + * buffer to scan. + * @param ch + * character to find. + * @param pos + * starting position. + * @return last index of {@code ch} in raw, trimming spaces. + * @since 4.1 + */ + public static int lastIndexOfTrim(byte[] raw, char ch, int pos) { while (pos >= 0 && raw[pos] == ' ') pos--; |