mirrors
/
jgit
mirror of https://github.com/eclipse/jgit.git


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016
							/*
 * Copyright (C) 2008-2009, Google Inc.
 * Copyright (C) 2006-2008, Shawn O. Pearce <spearce@spearce.org>
 * and other copyright owners as documented in the project's IP log.
 *
 * This program and the accompanying materials are made available
 * under the terms of the Eclipse Distribution License v1.0 which
 * accompanies this distribution, is reproduced below, and is
 * available at http://www.eclipse.org/org/documents/edl-v10.php
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or
 * without modification, are permitted provided that the following
 * conditions are met:
 *
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 *
 * - Redistributions in binary form must reproduce the above
 *   copyright notice, this list of conditions and the following
 *   disclaimer in the documentation and/or other materials provided
 *   with the distribution.
 *
 * - Neither the name of the Eclipse Foundation, Inc. nor the
 *   names of its contributors may be used to endorse or promote
 *   products derived from this software without specific prior
 *   written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package org.eclipse.jgit.util;

import static org.eclipse.jgit.lib.ObjectChecker.author;
import static org.eclipse.jgit.lib.ObjectChecker.committer;
import static org.eclipse.jgit.lib.ObjectChecker.encoding;
import static org.eclipse.jgit.lib.ObjectChecker.tagger;

import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.Arrays;

import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.PersonIdent;

/** Handy utility functions to parse raw object contents. */
public final class RawParseUtils {
	private static final byte[] digits10;

	private static final byte[] digits16;

	private static final byte[] footerLineKeyChars;

	static {
		digits10 = new byte['9' + 1];
		Arrays.fill(digits10, (byte) -1);
		for (char i = '0'; i <= '9'; i++)
			digits10[i] = (byte) (i - '0');

		digits16 = new byte['f' + 1];
		Arrays.fill(digits16, (byte) -1);
		for (char i = '0'; i <= '9'; i++)
			digits16[i] = (byte) (i - '0');
		for (char i = 'a'; i <= 'f'; i++)
			digits16[i] = (byte) ((i - 'a') + 10);
		for (char i = 'A'; i <= 'F'; i++)
			digits16[i] = (byte) ((i - 'A') + 10);

		footerLineKeyChars = new byte['z' + 1];
		footerLineKeyChars['-'] = 1;
		for (char i = '0'; i <= '9'; i++)
			footerLineKeyChars[i] = 1;
		for (char i = 'A'; i <= 'Z'; i++)
			footerLineKeyChars[i] = 1;
		for (char i = 'a'; i <= 'z'; i++)
			footerLineKeyChars[i] = 1;
	}

	/**
	 * Determine if b[ptr] matches src.
	 *
	 * @param b
	 *            the buffer to scan.
	 * @param ptr
	 *            first position within b, this should match src[0].
	 * @param src
	 *            the buffer to test for equality with b.
	 * @return ptr + src.length if b[ptr..src.length] == src; else -1.
	 */
	public static final int match(final byte[] b, int ptr, final byte[] src) {
		if (ptr + src.length > b.length)
			return -1;
		for (int i = 0; i < src.length; i++, ptr++)
			if (b[ptr] != src[i])
				return -1;
		return ptr;
	}

	private static final byte[] base10byte = { '0', '1', '2', '3', '4', '5',
			'6', '7', '8', '9' };

	/**
	 * Format a base 10 numeric into a temporary buffer.
	 * <p>
	 * Formatting is performed backwards. The method starts at offset
	 * <code>o-1</code> and ends at <code>o-1-digits</code>, where
	 * <code>digits</code> is the number of positions necessary to store the
	 * base 10 value.
	 * <p>
	 * The argument and return values from this method make it easy to chain
	 * writing, for example:
	 * </p>
	 *
	 * <pre>
	 * final byte[] tmp = new byte[64];
	 * int ptr = tmp.length;
	 * tmp[--ptr] = '\n';
	 * ptr = RawParseUtils.formatBase10(tmp, ptr, 32);
	 * tmp[--ptr] = ' ';
	 * ptr = RawParseUtils.formatBase10(tmp, ptr, 18);
	 * tmp[--ptr] = 0;
	 * final String str = new String(tmp, ptr, tmp.length - ptr);
	 * </pre>
	 *
	 * @param b
	 *            buffer to write into.
	 * @param o
	 *            one offset past the location where writing will begin; writing
	 *            proceeds towards lower index values.
	 * @param value
	 *            the value to store.
	 * @return the new offset value <code>o</code>. This is the position of
	 *         the last byte written. Additional writing should start at one
	 *         position earlier.
	 */
	public static int formatBase10(final byte[] b, int o, int value) {
		if (value == 0) {
			b[--o] = '0';
			return o;
		}
		final boolean isneg = value < 0;
		while (value != 0) {
			b[--o] = base10byte[value % 10];
			value /= 10;
		}
		if (isneg)
			b[--o] = '-';
		return o;
	}

	/**
	 * Parse a base 10 numeric from a sequence of ASCII digits into an int.
	 * <p>
	 * Digit sequences can begin with an optional run of spaces before the
	 * sequence, and may start with a '+' or a '-' to indicate sign position.
	 * Any other characters will cause the method to stop and return the current
	 * result to the caller.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position within buffer to start parsing digits at.
	 * @param ptrResult
	 *            optional location to return the new ptr value through. If null
	 *            the ptr value will be discarded.
	 * @return the value at this location; 0 if the location is not a valid
	 *         numeric.
	 */
	public static final int parseBase10(final byte[] b, int ptr,
			final MutableInteger ptrResult) {
		int r = 0;
		int sign = 0;
		try {
			final int sz = b.length;
			while (ptr < sz && b[ptr] == ' ')
				ptr++;
			if (ptr >= sz)
				return 0;

			switch (b[ptr]) {
			case '-':
				sign = -1;
				ptr++;
				break;
			case '+':
				ptr++;
				break;
			}

			while (ptr < sz) {
				final byte v = digits10[b[ptr]];
				if (v < 0)
					break;
				r = (r * 10) + v;
				ptr++;
			}
		} catch (ArrayIndexOutOfBoundsException e) {
			// Not a valid digit.
		}
		if (ptrResult != null)
			ptrResult.value = ptr;
		return sign < 0 ? -r : r;
	}

	/**
	 * Parse a base 10 numeric from a sequence of ASCII digits into a long.
	 * <p>
	 * Digit sequences can begin with an optional run of spaces before the
	 * sequence, and may start with a '+' or a '-' to indicate sign position.
	 * Any other characters will cause the method to stop and return the current
	 * result to the caller.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position within buffer to start parsing digits at.
	 * @param ptrResult
	 *            optional location to return the new ptr value through. If null
	 *            the ptr value will be discarded.
	 * @return the value at this location; 0 if the location is not a valid
	 *         numeric.
	 */
	public static final long parseLongBase10(final byte[] b, int ptr,
			final MutableInteger ptrResult) {
		long r = 0;
		int sign = 0;
		try {
			final int sz = b.length;
			while (ptr < sz && b[ptr] == ' ')
				ptr++;
			if (ptr >= sz)
				return 0;

			switch (b[ptr]) {
			case '-':
				sign = -1;
				ptr++;
				break;
			case '+':
				ptr++;
				break;
			}

			while (ptr < sz) {
				final byte v = digits10[b[ptr]];
				if (v < 0)
					break;
				r = (r * 10) + v;
				ptr++;
			}
		} catch (ArrayIndexOutOfBoundsException e) {
			// Not a valid digit.
		}
		if (ptrResult != null)
			ptrResult.value = ptr;
		return sign < 0 ? -r : r;
	}

	/**
	 * Parse 4 character base 16 (hex) formatted string to unsigned integer.
	 * <p>
	 * The number is read in network byte order, that is, most significant
	 * nybble first.
	 *
	 * @param bs
	 *            buffer to parse digits from; positions {@code [p, p+4)} will
	 *            be parsed.
	 * @param p
	 *            first position within the buffer to parse.
	 * @return the integer value.
	 * @throws ArrayIndexOutOfBoundsException
	 *             if the string is not hex formatted.
	 */
	public static final int parseHexInt16(final byte[] bs, final int p) {
		int r = digits16[bs[p]] << 4;

		r |= digits16[bs[p + 1]];
		r <<= 4;

		r |= digits16[bs[p + 2]];
		r <<= 4;

		r |= digits16[bs[p + 3]];
		if (r < 0)
			throw new ArrayIndexOutOfBoundsException();
		return r;
	}

	/**
	 * Parse 8 character base 16 (hex) formatted string to unsigned integer.
	 * <p>
	 * The number is read in network byte order, that is, most significant
	 * nybble first.
	 *
	 * @param bs
	 *            buffer to parse digits from; positions {@code [p, p+8)} will
	 *            be parsed.
	 * @param p
	 *            first position within the buffer to parse.
	 * @return the integer value.
	 * @throws ArrayIndexOutOfBoundsException
	 *             if the string is not hex formatted.
	 */
	public static final int parseHexInt32(final byte[] bs, final int p) {
		int r = digits16[bs[p]] << 4;

		r |= digits16[bs[p + 1]];
		r <<= 4;

		r |= digits16[bs[p + 2]];
		r <<= 4;

		r |= digits16[bs[p + 3]];
		r <<= 4;

		r |= digits16[bs[p + 4]];
		r <<= 4;

		r |= digits16[bs[p + 5]];
		r <<= 4;

		r |= digits16[bs[p + 6]];

		final int last = digits16[bs[p + 7]];
		if (r < 0 || last < 0)
			throw new ArrayIndexOutOfBoundsException();
		return (r << 4) | last;
	}

	/**
	 * Parse a single hex digit to its numeric value (0-15).
	 *
	 * @param digit
	 *            hex character to parse.
	 * @return numeric value, in the range 0-15.
	 * @throws ArrayIndexOutOfBoundsException
	 *             if the input digit is not a valid hex digit.
	 */
	public static final int parseHexInt4(final byte digit) {
		final byte r = digits16[digit];
		if (r < 0)
			throw new ArrayIndexOutOfBoundsException();
		return r;
	}

	/**
	 * Parse a Git style timezone string.
	 * <p>
	 * The sequence "-0315" will be parsed as the numeric value -195, as the
	 * lower two positions count minutes, not 100ths of an hour.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position within buffer to start parsing digits at.
	 * @return the timezone at this location, expressed in minutes.
	 */
	public static final int parseTimeZoneOffset(final byte[] b, int ptr) {
		final int v = parseBase10(b, ptr, null);
		final int tzMins = v % 100;
		final int tzHours = v / 100;
		return tzHours * 60 + tzMins;
	}

	/**
	 * Locate the first position after a given character.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position within buffer to start looking for chrA at.
	 * @param chrA
	 *            character to find.
	 * @return new position just after chrA.
	 */
	public static final int next(final byte[] b, int ptr, final char chrA) {
		final int sz = b.length;
		while (ptr < sz) {
			if (b[ptr++] == chrA)
				return ptr;
		}
		return ptr;
	}

	/**
	 * Locate the first position after the next LF.
	 * <p>
	 * This method stops on the first '\n' it finds.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position within buffer to start looking for LF at.
	 * @return new position just after the first LF found.
	 */
	public static final int nextLF(final byte[] b, int ptr) {
		return next(b, ptr, '\n');
	}

	/**
	 * Locate the first position after either the given character or LF.
	 * <p>
	 * This method stops on the first match it finds from either chrA or '\n'.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position within buffer to start looking for chrA or LF at.
	 * @param chrA
	 *            character to find.
	 * @return new position just after the first chrA or LF to be found.
	 */
	public static final int nextLF(final byte[] b, int ptr, final char chrA) {
		final int sz = b.length;
		while (ptr < sz) {
			final byte c = b[ptr++];
			if (c == chrA || c == '\n')
				return ptr;
		}
		return ptr;
	}

	/**
	 * Locate the first position before a given character.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position within buffer to start looking for chrA at.
	 * @param chrA
	 *            character to find.
	 * @return new position just before chrA, -1 for not found
	 */
	public static final int prev(final byte[] b, int ptr, final char chrA) {
		if (ptr == b.length)
			--ptr;
		while (ptr >= 0) {
			if (b[ptr--] == chrA)
				return ptr;
		}
		return ptr;
	}

	/**
	 * Locate the first position before the previous LF.
	 * <p>
	 * This method stops on the first '\n' it finds.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position within buffer to start looking for LF at.
	 * @return new position just before the first LF found, -1 for not found
	 */
	public static final int prevLF(final byte[] b, int ptr) {
		return prev(b, ptr, '\n');
	}

	/**
	 * Locate the previous position before either the given character or LF.
	 * <p>
	 * This method stops on the first match it finds from either chrA or '\n'.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position within buffer to start looking for chrA or LF at.
	 * @param chrA
	 *            character to find.
	 * @return new position just before the first chrA or LF to be found, -1 for
	 *         not found
	 */
	public static final int prevLF(final byte[] b, int ptr, final char chrA) {
		if (ptr == b.length)
			--ptr;
		while (ptr >= 0) {
			final byte c = b[ptr--];
			if (c == chrA || c == '\n')
				return ptr;
		}
		return ptr;
	}

	/**
	 * Index the region between <code>[ptr, end)</code> to find line starts.
	 * <p>
	 * The returned list is 1 indexed. Index 0 contains
	 * {@link Integer#MIN_VALUE} to pad the list out.
	 * <p>
	 * Using a 1 indexed list means that line numbers can be directly accessed
	 * from the list, so <code>list.get(1)</code> (aka get line 1) returns
	 * <code>ptr</code>.
	 * <p>
	 * The last element (index <code>map.size()-1</code>) always contains
	 * <code>end</code>.
	 *
	 * @param buf
	 *            buffer to scan.
	 * @param ptr
	 *            position within the buffer corresponding to the first byte of
	 *            line 1.
	 * @param end
	 *            1 past the end of the content within <code>buf</code>.
	 * @return a line map indexing the start position of each line.
	 */
	public static final IntList lineMap(final byte[] buf, int ptr, int end) {
		// Experimentally derived from multiple source repositories
		// the average number of bytes/line is 36. Its a rough guess
		// to initially size our map close to the target.
		//
		final IntList map = new IntList((end - ptr) / 36);
		map.fillTo(1, Integer.MIN_VALUE);
		for (; ptr < end; ptr = nextLF(buf, ptr))
			map.add(ptr);
		map.add(end);
		return map;
	}

	/**
	 * Locate the "author " header line data.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position in buffer to start the scan at. Most callers should
	 *            pass 0 to ensure the scan starts from the beginning of the
	 *            commit buffer and does not accidentally look at message body.
	 * @return position just after the space in "author ", so the first
	 *         character of the author's name. If no author header can be
	 *         located -1 is returned.
	 */
	public static final int author(final byte[] b, int ptr) {
		final int sz = b.length;
		if (ptr == 0)
			ptr += 46; // skip the "tree ..." line.
		while (ptr < sz && b[ptr] == 'p')
			ptr += 48; // skip this parent.
		return match(b, ptr, author);
	}

	/**
	 * Locate the "committer " header line data.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position in buffer to start the scan at. Most callers should
	 *            pass 0 to ensure the scan starts from the beginning of the
	 *            commit buffer and does not accidentally look at message body.
	 * @return position just after the space in "committer ", so the first
	 *         character of the committer's name. If no committer header can be
	 *         located -1 is returned.
	 */
	public static final int committer(final byte[] b, int ptr) {
		final int sz = b.length;
		if (ptr == 0)
			ptr += 46; // skip the "tree ..." line.
		while (ptr < sz && b[ptr] == 'p')
			ptr += 48; // skip this parent.
		if (ptr < sz && b[ptr] == 'a')
			ptr = nextLF(b, ptr);
		return match(b, ptr, committer);
	}

	/**
	 * Locate the "tagger " header line data.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position in buffer to start the scan at. Most callers should
	 *            pass 0 to ensure the scan starts from the beginning of the tag
	 *            buffer and does not accidentally look at message body.
	 * @return position just after the space in "tagger ", so the first
	 *         character of the tagger's name. If no tagger header can be
	 *         located -1 is returned.
	 */
	public static final int tagger(final byte[] b, int ptr) {
		final int sz = b.length;
		if (ptr == 0)
			ptr += 48; // skip the "object ..." line.
		while (ptr < sz) {
			if (b[ptr] == '\n')
				return -1;
			final int m = match(b, ptr, tagger);
			if (m >= 0)
				return m;
			ptr = nextLF(b, ptr);
		}
		return -1;
	}

	/**
	 * Locate the "encoding " header line.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position in buffer to start the scan at. Most callers should
	 *            pass 0 to ensure the scan starts from the beginning of the
	 *            buffer and does not accidentally look at the message body.
	 * @return position just after the space in "encoding ", so the first
	 *         character of the encoding's name. If no encoding header can be
	 *         located -1 is returned (and UTF-8 should be assumed).
	 */
	public static final int encoding(final byte[] b, int ptr) {
		final int sz = b.length;
		while (ptr < sz) {
			if (b[ptr] == '\n')
				return -1;
			if (b[ptr] == 'e')
				break;
			ptr = nextLF(b, ptr);
		}
		return match(b, ptr, encoding);
	}

	/**
	 * Parse the "encoding " header into a character set reference.
	 * <p>
	 * Locates the "encoding " header (if present) by first calling
	 * {@link #encoding(byte[], int)} and then returns the proper character set
	 * to apply to this buffer to evaluate its contents as character data.
	 * <p>
	 * If no encoding header is present, {@link Constants#CHARSET} is assumed.
	 *
	 * @param b
	 *            buffer to scan.
	 * @return the Java character set representation. Never null.
	 */
	public static Charset parseEncoding(final byte[] b) {
		final int enc = encoding(b, 0);
		if (enc < 0)
			return Constants.CHARSET;
		final int lf = nextLF(b, enc);
		return Charset.forName(decode(Constants.CHARSET, b, enc, lf - 1));
	}

	/**
	 * Parse a name line (e.g. author, committer, tagger) into a PersonIdent.
	 * <p>
	 * When passing in a value for <code>nameB</code> callers should use the
	 * return value of {@link #author(byte[], int)} or
	 * {@link #committer(byte[], int)}, as these methods provide the proper
	 * position within the buffer.
	 *
	 * @param raw
	 *            the buffer to parse character data from.
	 * @param nameB
	 *            first position of the identity information. This should be the
	 *            first position after the space which delimits the header field
	 *            name (e.g. "author" or "committer") from the rest of the
	 *            identity line.
	 * @return the parsed identity. Never null.
	 */
	public static PersonIdent parsePersonIdent(final byte[] raw, final int nameB) {
		final Charset cs = parseEncoding(raw);
		final int emailB = nextLF(raw, nameB, '<');
		final int emailE = nextLF(raw, emailB, '>');

		final String name = decode(cs, raw, nameB, emailB - 2);
		final String email = decode(cs, raw, emailB, emailE - 1);

		final MutableInteger ptrout = new MutableInteger();
		final long when = parseLongBase10(raw, emailE + 1, ptrout);
		final int tz = parseTimeZoneOffset(raw, ptrout.value);

		return new PersonIdent(name, email, when * 1000L, tz);
	}

	/**
	 * Parse a name data (e.g. as within a reflog) into a PersonIdent.
	 * <p>
	 * When passing in a value for <code>nameB</code> callers should use the
	 * return value of {@link #author(byte[], int)} or
	 * {@link #committer(byte[], int)}, as these methods provide the proper
	 * position within the buffer.
	 *
	 * @param raw
	 *            the buffer to parse character data from.
	 * @param nameB
	 *            first position of the identity information. This should be the
	 *            first position after the space which delimits the header field
	 *            name (e.g. "author" or "committer") from the rest of the
	 *            identity line.
	 * @return the parsed identity. Never null.
	 */
	public static PersonIdent parsePersonIdentOnly(final byte[] raw, final int nameB) {
		int stop = nextLF(raw, nameB);
		int emailB = nextLF(raw, nameB, '<');
		int emailE = nextLF(raw, emailB, '>');
		final String name;
		final String email;
		if (emailE < stop) {
			email = decode(raw, emailB, emailE - 1);
		} else {
			email = "invalid";
		}
		if (emailB < stop)
			name = decode(raw, nameB, emailB - 2);
		else
			name = decode(raw, nameB, stop);

		final MutableInteger ptrout = new MutableInteger();
		long when;
		int tz;
		if (emailE < stop) {
			when = parseLongBase10(raw, emailE + 1, ptrout);
			tz = parseTimeZoneOffset(raw, ptrout.value);
		} else {
			when = 0;
			tz = 0;
		}
		return new PersonIdent(name, email, when * 1000L, tz);
	}

	/**
	 * Locate the end of a footer line key string.
	 * <p>
	 * If the region at {@code raw[ptr]} matches {@code ^[A-Za-z0-9-]+:} (e.g.
	 * "Signed-off-by: A. U. Thor\n") then this method returns the position of
	 * the first ':'.
	 * <p>
	 * If the region at {@code raw[ptr]} does not match {@code ^[A-Za-z0-9-]+:}
	 * then this method returns -1.
	 *
	 * @param raw
	 *            buffer to scan.
	 * @param ptr
	 *            first position within raw to consider as a footer line key.
	 * @return position of the ':' which terminates the footer line key if this
	 *         is otherwise a valid footer line key; otherwise -1.
	 */
	public static int endOfFooterLineKey(final byte[] raw, int ptr) {
		try {
			for (;;) {
				final byte c = raw[ptr];
				if (footerLineKeyChars[c] == 0) {
					if (c == ':')
						return ptr;
					return -1;
				}
				ptr++;
			}
		} catch (ArrayIndexOutOfBoundsException e) {
			return -1;
		}
	}

	/**
	 * Decode a buffer under UTF-8, if possible.
	 *
	 * If the byte stream cannot be decoded that way, the platform default is tried
	 * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
	 *
	 * @param buffer
	 *            buffer to pull raw bytes from.
	 * @return a string representation of the range <code>[start,end)</code>,
	 *         after decoding the region through the specified character set.
	 */
	public static String decode(final byte[] buffer) {
		return decode(buffer, 0, buffer.length);
	}

	/**
	 * Decode a buffer under UTF-8, if possible.
	 *
	 * If the byte stream cannot be decoded that way, the platform default is
	 * tried and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
	 *
	 * @param buffer
	 *            buffer to pull raw bytes from.
	 * @param start
	 *            start position in buffer
	 * @param end
	 *            one position past the last location within the buffer to take
	 *            data from.
	 * @return a string representation of the range <code>[start,end)</code>,
	 *         after decoding the region through the specified character set.
	 */
	public static String decode(final byte[] buffer, final int start,
			final int end) {
		return decode(Constants.CHARSET, buffer, start, end);
	}

	/**
	 * Decode a buffer under the specified character set if possible.
	 *
	 * If the byte stream cannot be decoded that way, the platform default is tried
	 * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
	 *
	 * @param cs
	 *            character set to use when decoding the buffer.
	 * @param buffer
	 *            buffer to pull raw bytes from.
	 * @return a string representation of the range <code>[start,end)</code>,
	 *         after decoding the region through the specified character set.
	 */
	public static String decode(final Charset cs, final byte[] buffer) {
		return decode(cs, buffer, 0, buffer.length);
	}

	/**
	 * Decode a region of the buffer under the specified character set if possible.
	 *
	 * If the byte stream cannot be decoded that way, the platform default is tried
	 * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
	 *
	 * @param cs
	 *            character set to use when decoding the buffer.
	 * @param buffer
	 *            buffer to pull raw bytes from.
	 * @param start
	 *            first position within the buffer to take data from.
	 * @param end
	 *            one position past the last location within the buffer to take
	 *            data from.
	 * @return a string representation of the range <code>[start,end)</code>,
	 *         after decoding the region through the specified character set.
	 */
	public static String decode(final Charset cs, final byte[] buffer,
			final int start, final int end) {
		try {
			return decodeNoFallback(cs, buffer, start, end);
		} catch (CharacterCodingException e) {
			// Fall back to an ISO-8859-1 style encoding. At least all of
			// the bytes will be present in the output.
			//
			return extractBinaryString(buffer, start, end);
		}
	}

	/**
	 * Decode a region of the buffer under the specified character set if
	 * possible.
	 *
	 * If the byte stream cannot be decoded that way, the platform default is
	 * tried and if that too fails, an exception is thrown.
	 *
	 * @param cs
	 *            character set to use when decoding the buffer.
	 * @param buffer
	 *            buffer to pull raw bytes from.
	 * @param start
	 *            first position within the buffer to take data from.
	 * @param end
	 *            one position past the last location within the buffer to take
	 *            data from.
	 * @return a string representation of the range <code>[start,end)</code>,
	 *         after decoding the region through the specified character set.
	 * @throws CharacterCodingException
	 *             the input is not in any of the tested character sets.
	 */
	public static String decodeNoFallback(final Charset cs,
			final byte[] buffer, final int start, final int end)
			throws CharacterCodingException {
		final ByteBuffer b = ByteBuffer.wrap(buffer, start, end - start);
		b.mark();

		// Try our built-in favorite. The assumption here is that
		// decoding will fail if the data is not actually encoded
		// using that encoder.
		//
		try {
			return decode(b, Constants.CHARSET);
		} catch (CharacterCodingException e) {
			b.reset();
		}

		if (!cs.equals(Constants.CHARSET)) {
			// Try the suggested encoding, it might be right since it was
			// provided by the caller.
			//
			try {
				return decode(b, cs);
			} catch (CharacterCodingException e) {
				b.reset();
			}
		}

		// Try the default character set. A small group of people
		// might actually use the same (or very similar) locale.
		//
		final Charset defcs = Charset.defaultCharset();
		if (!defcs.equals(cs) && !defcs.equals(Constants.CHARSET)) {
			try {
				return decode(b, defcs);
			} catch (CharacterCodingException e) {
				b.reset();
			}
		}

		throw new CharacterCodingException();
	}

	/**
	 * Decode a region of the buffer under the ISO-8859-1 encoding.
	 *
	 * Each byte is treated as a single character in the 8859-1 character
	 * encoding, performing a raw binary->char conversion.
	 *
	 * @param buffer
	 *            buffer to pull raw bytes from.
	 * @param start
	 *            first position within the buffer to take data from.
	 * @param end
	 *            one position past the last location within the buffer to take
	 *            data from.
	 * @return a string representation of the range <code>[start,end)</code>.
	 */
	public static String extractBinaryString(final byte[] buffer,
			final int start, final int end) {
		final StringBuilder r = new StringBuilder(end - start);
		for (int i = start; i < end; i++)
			r.append((char) (buffer[i] & 0xff));
		return r.toString();
	}

	private static String decode(final ByteBuffer b, final Charset charset)
			throws CharacterCodingException {
		final CharsetDecoder d = charset.newDecoder();
		d.onMalformedInput(CodingErrorAction.REPORT);
		d.onUnmappableCharacter(CodingErrorAction.REPORT);
		return d.decode(b).toString();
	}

	/**
	 * Locate the position of the commit message body.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position in buffer to start the scan at. Most callers should
	 *            pass 0 to ensure the scan starts from the beginning of the
	 *            commit buffer.
	 * @return position of the user's message buffer.
	 */
	public static final int commitMessage(final byte[] b, int ptr) {
		final int sz = b.length;
		if (ptr == 0)
			ptr += 46; // skip the "tree ..." line.
		while (ptr < sz && b[ptr] == 'p')
			ptr += 48; // skip this parent.

		// Skip any remaining header lines, ignoring what their actual
		// header line type is. This is identical to the logic for a tag.
		//
		return tagMessage(b, ptr);
	}

	/**
	 * Locate the position of the tag message body.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param ptr
	 *            position in buffer to start the scan at. Most callers should
	 *            pass 0 to ensure the scan starts from the beginning of the tag
	 *            buffer.
	 * @return position of the user's message buffer.
	 */
	public static final int tagMessage(final byte[] b, int ptr) {
		final int sz = b.length;
		if (ptr == 0)
			ptr += 48; // skip the "object ..." line.
		while (ptr < sz && b[ptr] != '\n')
			ptr = nextLF(b, ptr);
		if (ptr < sz && b[ptr] == '\n')
			return ptr + 1;
		return -1;
	}

	/**
	 * Locate the end of a paragraph.
	 * <p>
	 * A paragraph is ended by two consecutive LF bytes.
	 *
	 * @param b
	 *            buffer to scan.
	 * @param start
	 *            position in buffer to start the scan at. Most callers will
	 *            want to pass the first position of the commit message (as
	 *            found by {@link #commitMessage(byte[], int)}.
	 * @return position of the LF at the end of the paragraph;
	 *         <code>b.length</code> if no paragraph end could be located.
	 */
	public static final int endOfParagraph(final byte[] b, final int start) {
		int ptr = start;
		final int sz = b.length;
		while (ptr < sz && b[ptr] != '\n')
			ptr = nextLF(b, ptr);
		while (0 < ptr && start < ptr && b[ptr - 1] == '\n')
			ptr--;
		return ptr;
	}

	private RawParseUtils() {
		// Don't create instances of a static only utility.
	}
}