FooterLines: handle extraction from messages without headers

author Nitzan Gur-Furman <nitzan@google.com>

Wed, 6 Dec 2023 13:47:27 +0000 (14:47 +0100)

committer Nitzan Gur-Furman <nitzan@google.com>

Tue, 9 Jan 2024 08:14:06 +0000 (09:14 +0100)
author Nitzan Gur-Furman <nitzan@google.com>
Wed, 6 Dec 2023 13:47:27 +0000 (14:47 +0100)
committer Nitzan Gur-Furman <nitzan@google.com>
Tue, 9 Jan 2024 08:14:06 +0000 (09:14 +0100)
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/FooterLineTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/FooterLineTest.java

index a6808f635143fa56342eb59ae1c50c6996509a5e..cac0743d689bf6ec58283e0031e23a37bd6ffcd9 100644 (file)
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/FooterLineTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/FooterLineTest.java
@@ -97,6 +97,15 @@ public class FooterLineTest extends RepositoryTestCase {
                 assertEquals(1, footers.size());
         }
  
+       @Test
+       public void testOneFooter_longSubject_NoHeaders() {
+               String noRawMsg = "50+ chars loooooooooooooong custom commit message.\n\n"
+                               + "Footer: value\n";
+               List<FooterLine> footers = FooterLine.fromMessage(noRawMsg);
+               assertNotNull(footers);
+               assertEquals(1, footers.size());
+       }
+
         @Test
         public void testSignedOffBy_OneUserNoLF() {
                 String msg = buildMessage("subject\n\nbody of commit\n" + "\n"
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FooterLine.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FooterLine.java

index d651b63afba7c71c961aa4c1e07afe24ebd3d167..227ea0fd65affc32c8060778c6a08f1d250044f9 100644 (file)
--- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FooterLine.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FooterLine.java
@@ -81,7 +81,11 @@ public final class FooterLine {
                         // empty
                 }
  
-               int msgB = RawParseUtils.commitMessage(raw, 0);
+               // The first non-header line is never a footer.
+               int msgB = RawParseUtils.nextLfSkippingSplitLines(raw,
+                               RawParseUtils.hasAnyKnownHeaders(raw)
+                                               ? RawParseUtils.commitMessage(raw, 0)
+                                               : 0);
                 ArrayList<FooterLine> r = new ArrayList<>(4);
                 Charset enc = RawParseUtils.guessEncoding(raw);
  
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java

index 71fae815fe197ac2b8eb6fb25b31c1f8946c80ad..fccdca8b9d7a912d708b9f4518fac39e2c15f5c8 100644 (file)
--- a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java
@@ -16,7 +16,12 @@ import static java.nio.charset.StandardCharsets.UTF_8;
  import static org.eclipse.jgit.lib.ObjectChecker.author;
  import static org.eclipse.jgit.lib.ObjectChecker.committer;
  import static org.eclipse.jgit.lib.ObjectChecker.encoding;
+import static org.eclipse.jgit.lib.ObjectChecker.object;
+import static org.eclipse.jgit.lib.ObjectChecker.parent;
+import static org.eclipse.jgit.lib.ObjectChecker.tag;
  import static org.eclipse.jgit.lib.ObjectChecker.tagger;
+import static org.eclipse.jgit.lib.ObjectChecker.tree;
+import static org.eclipse.jgit.lib.ObjectChecker.type;
  
  import java.nio.ByteBuffer;
  import java.nio.charset.CharacterCodingException;
@@ -520,17 +525,24 @@ public final class RawParseUtils {
         }
  
         /**
-        * Locate the end of the header.  Note that headers may be
-        * more than one line long.
+        * Locate the first end of line after the given position, while treating
+        * following lines which are starting with spaces as part of the current
+        * line.
+        * <p>
+        * For example, {@code nextLfSkippingSplitLines(
+        * "row \n with space at beginning of a following line\nThe actual next line",
+        * 0)} will return the position of {@code "\nThe actual next line"}.
+        *
          * @param b
          *            buffer to scan.
          * @param ptr
-        *            position within buffer to start looking for the end-of-header.
-        * @return new position just after the header.  This is either
-        * b.length, or the index of the header's terminating newline.
-        * @since 5.1
+        *            position within buffer to start looking for the next line.
+        * @return new position just after the line end of the last line-split. This
+        *         is either b.length, or the index of the current split-line's
+        *         terminating newline.
+        * @since 6.9
          */
-       public static final int headerEnd(final byte[] b, int ptr) {
+       public static final int nextLfSkippingSplitLines(final byte[] b, int ptr) {
                 final int sz = b.length;
                 while (ptr < sz) {
                         final byte c = b[ptr++];
@@ -541,6 +553,28 @@ public final class RawParseUtils {
                 return ptr - 1;
         }
  
+       /**
+        * Locate the first end of header after the given position. Note that
+        * headers may be more than one line long.
+        * <p>
+        * Also note that there might be multiple headers. If you wish to find the
+        * last header's end - call this in a loop.
+        *
+        * @param b
+        *            buffer to scan.
+        * @param ptr
+        *            position within buffer to start looking for the header
+        *            (normally a new-line).
+        * @return new position just after the line end. This is either b.length, or
+        *         the index of the header's terminating newline.
+        * @since 5.1
+        * @deprecated use {{@link #nextLfSkippingSplitLines}} directly instead
+        */
+       @Deprecated
+       public static final int headerEnd(final byte[] b, int ptr) {
+               return nextLfSkippingSplitLines(b, ptr);
+       }
+
         /**
          * Find the start of the contents of a given header.
          *
@@ -575,6 +609,21 @@ public final class RawParseUtils {
                 return -1;
         }
  
+       /**
+        * Returns whether the message starts with any known headers.
+        *
+        * @param b
+        *            buffer to scan.
+        * @return whether the message starts with any known headers
+        */
+       public static final boolean hasAnyKnownHeaders(byte[] b) {
+               return match(b, 0, tree) != -1 || match(b, 0, parent) != -1
+                               || match(b, 0, author) != -1 || match(b, 0, committer) != -1
+                               || match(b, 0, encoding) != -1 || match(b, 0, object) != -1
+                               || match(b, 0, type) != -1 || match(b, 0, tag) != -1
+                               || match(b, 0, tagger) != -1;
+       }
+
         /**
          * Locate the first position before a given character.
          *
@@ -1258,6 +1307,7 @@ public final class RawParseUtils {
                 final int sz = b.length;
                 if (ptr == 0)
                         ptr += 48; // skip the "object ..." line.
+               // Assume the rest of the current paragraph is all headers.
                 while (ptr < sz && b[ptr] != '\n')
                         ptr = nextLF(b, ptr);
                 if (ptr < sz && b[ptr] == '\n')
author	Nitzan Gur-Furman <nitzan@google.com>
	Wed, 6 Dec 2023 13:47:27 +0000 (14:47 +0100)
committer	Nitzan Gur-Furman <nitzan@google.com>
	Tue, 9 Jan 2024 08:14:06 +0000 (09:14 +0100)
org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/FooterLineTest.java		patch \| blob \| history
org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FooterLine.java		patch \| blob \| history
org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java		patch \| blob \| history