]> source.dussan.org Git - jgit.git/commitdiff
Improve footer parsing to allow multiline footers. 64/1172664/2
authorKamil Musin <kamilm@google.com>
Fri, 24 Nov 2023 14:17:26 +0000 (15:17 +0100)
committerKamil Musin <kamilm@google.com>
Fri, 24 Nov 2023 14:39:02 +0000 (15:39 +0100)
According to the https://git-scm.com/docs/git-interpret-trailers the
CGit supports multiline trailers. Subsequent lines of such multiline
trailers have to start with a whitespace.

We also rewrite the original parsing code to make it easier to work
with. The old code had pointers moving both backwards and forwards at
the same time. In the rewritten code we first find the start of the last
paragraph and then do all the parsing.

Since all the getters of the FooterLine return String, I've considered
rewriting the parsing code to operate on strings. However the original
code seems to be written with the idea, that the data is only lazily
copied in getters and no extra allocations should be performed during
original parsing (ex. during RevWalk). The changed code keeps to this
idea.

Bug: Google b/312440626
Change-Id: Ie1e3b17a4a5ab767b771c95f00c283ea6c300220

org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/FooterLineTest.java
org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FooterLine.java
org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java

index 01f6a3a0a012c9960b395d1b01dd6b1ab701db64..303aedcd0087e0c7b55a5d8e0d222139b118e534 100644 (file)
@@ -318,6 +318,57 @@ public class FooterLineTest extends RepositoryTestCase {
                assertFalse("not CC", line.matches(FooterKey.CC));
        }
 
+       @Test
+       public void testMultilineFooters() {
+               String msg = buildMessage("subject\n\nbody of commit\n"
+                               + "Not-A-Footer-Line: this line must not be read as a footer\n"
+                               + "\n" // paragraph break, now footers appear in final block
+                               + "Notes: The change must not be merged until dependency ABC is\n"
+                               + " updated.\n"
+                               + "CC:            <some.mailing.list@example.com>\n"
+                               + "not really a footer line but we'll skip it anyway\n"
+                               + "Acked-by: Some Reviewer <sr@example.com>\n");
+               List<FooterLine> footers = FooterLine.fromMessage(msg);
+               FooterLine f;
+
+               assertNotNull(footers);
+               assertEquals(3, footers.size());
+
+               f = footers.get(0);
+               assertEquals("Notes", f.getKey());
+               assertEquals(
+                               "The change must not be merged until dependency ABC is updated.",
+                               f.getValue());
+
+               f = footers.get(1);
+               assertEquals("CC", f.getKey());
+               assertEquals("<some.mailing.list@example.com>", f.getValue());
+
+               f = footers.get(2);
+               assertEquals("Acked-by", f.getKey());
+               assertEquals("Some Reviewer <sr@example.com>", f.getValue());
+       }
+
+       @Test
+       public void testMultilineFooters_multipleWhitespaceAreAllowed() {
+               String msg = buildMessage("subject\n\nbody of commit\n"
+                               + "Not-A-Footer-Line: this line must not be read as a footer\n"
+                               + "\n" // paragraph break, now footers appear in final block
+                               + "Notes: The change must not be merged until dependency ABC is\n"
+                               + "    updated.\n");
+               List<FooterLine> footers = FooterLine.fromMessage(msg);
+               FooterLine f;
+
+               assertNotNull(footers);
+               assertEquals(1, footers.size());
+
+               f = footers.get(0);
+               assertEquals("Notes", f.getKey());
+               assertEquals(
+                               "The change must not be merged until dependency ABC is updated.",
+                               f.getValue());
+       }
+
        private String buildMessage(String msg) {
                StringBuilder buf = new StringBuilder();
                buf.append("tree " + ObjectId.zeroId().name() + "\n");
index 2d396d57c50ff5d2660ae04ef0fafa9aa2df5557..cadeaec6e0a77180abf4c044aea42032b424d10f 100644 (file)
@@ -74,41 +74,42 @@ public final class FooterLine {
         */
        public static List<FooterLine> fromMessage(
                        byte[] raw) {
-               int ptr = raw.length - 1;
-               while (raw[ptr] == '\n') // trim any trailing LFs, not interesting
-                       ptr--;
+               // Find the end of the last paragraph.
+               int parEnd = raw.length;
+               for (; parEnd > 0 && (raw[parEnd - 1] == '\n' || raw[parEnd - 1] == ' '); --parEnd);
 
                int msgB = RawParseUtils.commitMessage(raw, 0);
                ArrayList<FooterLine> r = new ArrayList<>(4);
                Charset enc = RawParseUtils.guessEncoding(raw);
-               for (;;) {
-                       ptr = RawParseUtils.prevLF(raw, ptr);
-                       if (ptr <= msgB)
-                               break; // Don't parse commit headers as footer lines.
 
-                       int keyStart = ptr + 2;
-                       if (raw[keyStart] == '\n')
-                               break; // Stop at first paragraph break, no footers above it.
+               // Search for the beginning of last paragraph
+               int parStart = parEnd;
+               for (; parStart > msgB && (raw[parStart - 1] != '\n' || raw[parStart - 2] != '\n'); --parStart);
 
-                       int keyEnd = RawParseUtils.endOfFooterLineKey(raw, keyStart);
-                       if (keyEnd < 0)
-                               continue; // Not a well formed footer line, skip it.
+               for (int ptr = parStart; ptr < parEnd;) {
+                       int keyStart = ptr;
+                       int keyEnd = RawParseUtils.endOfFooterLineKey(raw, ptr);
+                       if (keyEnd < 0) {
+                               // Not a well-formed footer line, skip it.
+                               ptr = RawParseUtils.nextLF(raw, ptr);
+                               continue;
+                       }
 
                        // Skip over the ': *' at the end of the key before the value.
-                       //
-                       int valStart = keyEnd + 1;
-                       while (valStart < raw.length && raw[valStart] == ' ')
-                               valStart++;
-
-                       // Value ends at the LF, and does not include it.
-                       //
-                       int valEnd = RawParseUtils.nextLF(raw, valStart);
-                       if (raw[valEnd - 1] == '\n')
-                               valEnd--;
-
+                       int valStart, valEnd;
+                       for (valStart = keyEnd + 1; valStart < raw.length && raw[valStart] == ' '; ++valStart);
+
+                       for(ptr = valStart;;) {
+                               ptr = RawParseUtils.nextLF(raw, ptr);
+                               // Next line starts with whitespace for a multiline footer.
+                               if (ptr == raw.length || raw[ptr] != ' ') {
+                                       valEnd = raw[ptr - 1] == '\n' ? ptr - 1 : ptr;
+                                       break;
+                               }
+                       }
                        r.add(new FooterLine(raw, enc, keyStart, keyEnd, valStart, valEnd));
                }
-               Collections.reverse(r);
+
                return r;
        }
 
@@ -199,7 +200,7 @@ public final class FooterLine {
         *         character encoding.
         */
        public String getValue() {
-               return RawParseUtils.decode(enc, buffer, valStart, valEnd);
+               return RawParseUtils.decode(enc, buffer, valStart, valEnd).replaceAll("\n +", " ");
        }
 
        /**
index 0392ea428cdae2d639bf40d9ea6c34c623b49644..1f0f13152eaa354100ba70752ff9db2ca3d65746 100644 (file)
@@ -570,8 +570,9 @@ public class RevCommit extends RevObject {
         * the order of the line's appearance in the commit message itself.
         * <p>
         * A footer line's key must match the pattern {@code ^[A-Za-z0-9-]+:}, while
-        * the value is free-form, but must not contain an LF. Very common keys seen
-        * in the wild are:
+        * the value is free-form. The Value may be split over multiple lines with
+        * each subsequent line starting with at least one whitespace. Very common
+        * keys seen in the wild are:
         * <ul>
         * <li>{@code Signed-off-by} (agrees to Developer Certificate of Origin)
         * <li>{@code Acked-by} (thinks change looks sane in context)