From: Kamil Musin Date: Fri, 24 Nov 2023 14:17:26 +0000 (+0100) Subject: Improve footer parsing to allow multiline footers. X-Git-Tag: v6.8.0.202311291450-r~7^2^2 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=340cc787a0d14a0698d757a919ccd77e0a129fb0;p=jgit.git Improve footer parsing to allow multiline footers. According to the https://git-scm.com/docs/git-interpret-trailers the CGit supports multiline trailers. Subsequent lines of such multiline trailers have to start with a whitespace. We also rewrite the original parsing code to make it easier to work with. The old code had pointers moving both backwards and forwards at the same time. In the rewritten code we first find the start of the last paragraph and then do all the parsing. Since all the getters of the FooterLine return String, I've considered rewriting the parsing code to operate on strings. However the original code seems to be written with the idea, that the data is only lazily copied in getters and no extra allocations should be performed during original parsing (ex. during RevWalk). The changed code keeps to this idea. Bug: Google b/312440626 Change-Id: Ie1e3b17a4a5ab767b771c95f00c283ea6c300220 --- diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/FooterLineTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/FooterLineTest.java index 01f6a3a0a0..303aedcd00 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/FooterLineTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/FooterLineTest.java @@ -318,6 +318,57 @@ public class FooterLineTest extends RepositoryTestCase { assertFalse("not CC", line.matches(FooterKey.CC)); } + @Test + public void testMultilineFooters() { + String msg = buildMessage("subject\n\nbody of commit\n" + + "Not-A-Footer-Line: this line must not be read as a footer\n" + + "\n" // paragraph break, now footers appear in final block + + "Notes: The change must not be merged until dependency ABC is\n" + + " updated.\n" + + "CC: \n" + + "not really a footer line but we'll skip it anyway\n" + + "Acked-by: Some Reviewer \n"); + List footers = FooterLine.fromMessage(msg); + FooterLine f; + + assertNotNull(footers); + assertEquals(3, footers.size()); + + f = footers.get(0); + assertEquals("Notes", f.getKey()); + assertEquals( + "The change must not be merged until dependency ABC is updated.", + f.getValue()); + + f = footers.get(1); + assertEquals("CC", f.getKey()); + assertEquals("", f.getValue()); + + f = footers.get(2); + assertEquals("Acked-by", f.getKey()); + assertEquals("Some Reviewer ", f.getValue()); + } + + @Test + public void testMultilineFooters_multipleWhitespaceAreAllowed() { + String msg = buildMessage("subject\n\nbody of commit\n" + + "Not-A-Footer-Line: this line must not be read as a footer\n" + + "\n" // paragraph break, now footers appear in final block + + "Notes: The change must not be merged until dependency ABC is\n" + + " updated.\n"); + List footers = FooterLine.fromMessage(msg); + FooterLine f; + + assertNotNull(footers); + assertEquals(1, footers.size()); + + f = footers.get(0); + assertEquals("Notes", f.getKey()); + assertEquals( + "The change must not be merged until dependency ABC is updated.", + f.getValue()); + } + private String buildMessage(String msg) { StringBuilder buf = new StringBuilder(); buf.append("tree " + ObjectId.zeroId().name() + "\n"); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FooterLine.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FooterLine.java index 2d396d57c5..cadeaec6e0 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FooterLine.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FooterLine.java @@ -74,41 +74,42 @@ public final class FooterLine { */ public static List fromMessage( byte[] raw) { - int ptr = raw.length - 1; - while (raw[ptr] == '\n') // trim any trailing LFs, not interesting - ptr--; + // Find the end of the last paragraph. + int parEnd = raw.length; + for (; parEnd > 0 && (raw[parEnd - 1] == '\n' || raw[parEnd - 1] == ' '); --parEnd); int msgB = RawParseUtils.commitMessage(raw, 0); ArrayList r = new ArrayList<>(4); Charset enc = RawParseUtils.guessEncoding(raw); - for (;;) { - ptr = RawParseUtils.prevLF(raw, ptr); - if (ptr <= msgB) - break; // Don't parse commit headers as footer lines. - int keyStart = ptr + 2; - if (raw[keyStart] == '\n') - break; // Stop at first paragraph break, no footers above it. + // Search for the beginning of last paragraph + int parStart = parEnd; + for (; parStart > msgB && (raw[parStart - 1] != '\n' || raw[parStart - 2] != '\n'); --parStart); - int keyEnd = RawParseUtils.endOfFooterLineKey(raw, keyStart); - if (keyEnd < 0) - continue; // Not a well formed footer line, skip it. + for (int ptr = parStart; ptr < parEnd;) { + int keyStart = ptr; + int keyEnd = RawParseUtils.endOfFooterLineKey(raw, ptr); + if (keyEnd < 0) { + // Not a well-formed footer line, skip it. + ptr = RawParseUtils.nextLF(raw, ptr); + continue; + } // Skip over the ': *' at the end of the key before the value. - // - int valStart = keyEnd + 1; - while (valStart < raw.length && raw[valStart] == ' ') - valStart++; - - // Value ends at the LF, and does not include it. - // - int valEnd = RawParseUtils.nextLF(raw, valStart); - if (raw[valEnd - 1] == '\n') - valEnd--; - + int valStart, valEnd; + for (valStart = keyEnd + 1; valStart < raw.length && raw[valStart] == ' '; ++valStart); + + for(ptr = valStart;;) { + ptr = RawParseUtils.nextLF(raw, ptr); + // Next line starts with whitespace for a multiline footer. + if (ptr == raw.length || raw[ptr] != ' ') { + valEnd = raw[ptr - 1] == '\n' ? ptr - 1 : ptr; + break; + } + } r.add(new FooterLine(raw, enc, keyStart, keyEnd, valStart, valEnd)); } - Collections.reverse(r); + return r; } @@ -199,7 +200,7 @@ public final class FooterLine { * character encoding. */ public String getValue() { - return RawParseUtils.decode(enc, buffer, valStart, valEnd); + return RawParseUtils.decode(enc, buffer, valStart, valEnd).replaceAll("\n +", " "); } /** diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java index 0392ea428c..1f0f13152e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java @@ -570,8 +570,9 @@ public class RevCommit extends RevObject { * the order of the line's appearance in the commit message itself. *

* A footer line's key must match the pattern {@code ^[A-Za-z0-9-]+:}, while - * the value is free-form, but must not contain an LF. Very common keys seen - * in the wild are: + * the value is free-form. The Value may be split over multiple lines with + * each subsequent line starting with at least one whitespace. Very common + * keys seen in the wild are: *

    *
  • {@code Signed-off-by} (agrees to Developer Certificate of Origin) *
  • {@code Acked-by} (thinks change looks sane in context)