FooterLines: handle extraction from messages without headers

Prior to this change, long subjects of messages with no headers were treated as headers, and therefore were skipped. In a message of the form `<long subject>\n\n<footers>`, the footers would then get parsed as a message, meaning no footers were returned. After this change, the first lines are skipped only if they match any of the known headers. The first line ofter the optional headers is then assumed to be the subject line. `FooterLineTest` had a few test cases for extracting footers from messages with no headers. However, there were all with short messages, so the "skip this line" logic in `RawParseUtils` was never triggered. Added test case to catch this issue. Change-Id: I971a1dddf1a9aea094360c3c8fc3b9a8b011bbf9 Issue: Google b/287891316
author: Nitzan Gur-Furman <nitzan@google.com> 2023-12-06 14:47:27 +0100
committer: Nitzan Gur-Furman <nitzan@google.com> 2024-01-09 09:14:06 +0100
commit: 8116f66b9c8e032ef0a2c14e5b242e710ae0d323 (patch)
tree: 22659c209fdf4ba5f1d8a4896e91bd59b2eea26c /org.eclipse.jgit/src/org/eclipse/jgit/util
parent: 4d2bdba32687b87a18b0b3a8439a573305ace3ea (diff)
download: jgit-8116f66b9c8e032ef0a2c14e5b242e710ae0d323.tar.gz
jgit-8116f66b9c8e032ef0a2c14e5b242e710ae0d323.zip
1 files changed, 57 insertions, 7 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
index 71fae815fe..fccdca8b9d 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
@@ -16,7 +16,12 @@ import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.eclipse.jgit.lib.ObjectChecker.author;
 import static org.eclipse.jgit.lib.ObjectChecker.committer;
 import static org.eclipse.jgit.lib.ObjectChecker.encoding;
+import static org.eclipse.jgit.lib.ObjectChecker.object;
+import static org.eclipse.jgit.lib.ObjectChecker.parent;
+import static org.eclipse.jgit.lib.ObjectChecker.tag;
 import static org.eclipse.jgit.lib.ObjectChecker.tagger;
+import static org.eclipse.jgit.lib.ObjectChecker.tree;
+import static org.eclipse.jgit.lib.ObjectChecker.type;
 
 import java.nio.ByteBuffer;
 import java.nio.charset.CharacterCodingException;
@@ -520,17 +525,24 @@ public final class RawParseUtils {
 	}
 
 	/**
-	 * Locate the end of the header.  Note that headers may be
-	 * more than one line long.
+	 * Locate the first end of line after the given position, while treating
+	 * following lines which are starting with spaces as part of the current
+	 * line.
+	 * <p>
+	 * For example, {@code nextLfSkippingSplitLines(
+	 * "row \n with space at beginning of a following line\nThe actual next line",
+	 * 0)} will return the position of {@code "\nThe actual next line"}.
+	 *
 	 * @param b
 	 *            buffer to scan.
 	 * @param ptr
-	 *            position within buffer to start looking for the end-of-header.
-	 * @return new position just after the header.  This is either
-	 * b.length, or the index of the header's terminating newline.
-	 * @since 5.1
+	 *            position within buffer to start looking for the next line.
+	 * @return new position just after the line end of the last line-split. This
+	 *         is either b.length, or the index of the current split-line's
+	 *         terminating newline.
+	 * @since 6.9
 	 */
-	public static final int headerEnd(final byte[] b, int ptr) {
+	public static final int nextLfSkippingSplitLines(final byte[] b, int ptr) {
 		final int sz = b.length;
 		while (ptr < sz) {
 			final byte c = b[ptr++];
@@ -542,6 +554,28 @@ public final class RawParseUtils {
 	}
 
 	/**
+	 * Locate the first end of header after the given position. Note that
+	 * headers may be more than one line long.
+	 * <p>
+	 * Also note that there might be multiple headers. If you wish to find the
+	 * last header's end - call this in a loop.
+	 *
+	 * @param b
+	 *            buffer to scan.
+	 * @param ptr
+	 *            position within buffer to start looking for the header
+	 *            (normally a new-line).
+	 * @return new position just after the line end. This is either b.length, or
+	 *         the index of the header's terminating newline.
+	 * @since 5.1
+	 * @deprecated use {{@link #nextLfSkippingSplitLines}} directly instead
+	 */
+	@Deprecated
+	public static final int headerEnd(final byte[] b, int ptr) {
+		return nextLfSkippingSplitLines(b, ptr);
+	}
+
+	/**
 	 * Find the start of the contents of a given header.
 	 *
 	 * @param b
@@ -576,6 +610,21 @@ public final class RawParseUtils {
 	}
 
 	/**
+	 * Returns whether the message starts with any known headers.
+	 *
+	 * @param b
+	 *            buffer to scan.
+	 * @return whether the message starts with any known headers
+	 */
+	public static final boolean hasAnyKnownHeaders(byte[] b) {
+		return match(b, 0, tree) != -1 || match(b, 0, parent) != -1
+				|| match(b, 0, author) != -1 || match(b, 0, committer) != -1
+				|| match(b, 0, encoding) != -1 || match(b, 0, object) != -1
+				|| match(b, 0, type) != -1 || match(b, 0, tag) != -1
+				|| match(b, 0, tagger) != -1;
+	}
+
+	/**
 	 * Locate the first position before a given character.
 	 *
 	 * @param b
@@ -1258,6 +1307,7 @@ public final class RawParseUtils {
 		final int sz = b.length;
 		if (ptr == 0)
 			ptr += 48; // skip the "object ..." line.
+		// Assume the rest of the current paragraph is all headers.
 		while (ptr < sz && b[ptr] != '\n')
 			ptr = nextLF(b, ptr);
 		if (ptr < sz && b[ptr] == '\n')
author	Nitzan Gur-Furman <nitzan@google.com>	2023-12-06 14:47:27 +0100
committer	Nitzan Gur-Furman <nitzan@google.com>	2024-01-09 09:14:06 +0100
commit	8116f66b9c8e032ef0a2c14e5b242e710ae0d323 (patch)
tree	22659c209fdf4ba5f1d8a4896e91bd59b2eea26c /org.eclipse.jgit/src/org/eclipse/jgit/util
parent	4d2bdba32687b87a18b0b3a8439a573305ace3ea (diff)
download	jgit-8116f66b9c8e032ef0a2c14e5b242e710ae0d323.tar.gz jgit-8116f66b9c8e032ef0a2c14e5b242e710ae0d323.zip