From 4b3c5194acd8a9b18bf269888cab2ea29c376508 Mon Sep 17 00:00:00 2001 From: Kaushik Lingarkar Date: Tue, 19 Nov 2024 09:37:33 -0800 Subject: Support built-in diff drivers for hunk header function names The regexes defined for each built-in driver will be used to determine the function name for a hunk header. Each driver can specify a list of regexes to negate and match. They can also define pattern compilation flags if needed. These drivers only apply to text files with unified patch type. Following built-in drivers have been added: - cpp - dts - java - python - rust Support for more languages can be added as needed to match the cgit implementation. Change-Id: Ice5430bfed7e4aaf9f00e52e44402479984953c5 --- org.eclipse.jgit/.settings/.api_filters | 28 +++++ .../src/org/eclipse/jgit/diff/DiffDriver.java | 116 +++++++++++++++++ .../src/org/eclipse/jgit/diff/DiffFormatter.java | 140 +++++++++++++++++++-- .../eclipse/jgit/diff/PatchIdDiffFormatter.java | 4 +- 4 files changed, 275 insertions(+), 13 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffDriver.java (limited to 'org.eclipse.jgit') diff --git a/org.eclipse.jgit/.settings/.api_filters b/org.eclipse.jgit/.settings/.api_filters index 2c22f0231c..d3ae4cf376 100644 --- a/org.eclipse.jgit/.settings/.api_filters +++ b/org.eclipse.jgit/.settings/.api_filters @@ -1,5 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffDriver.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffDriver.java new file mode 100644 index 0000000000..9ae1aaa298 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffDriver.java @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +package org.eclipse.jgit.diff; + +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * Built-in drivers for various languages, sorted by name. These drivers will be + * used to determine function names for a hunk. + *

+ * When writing or updating patterns, assume the contents are syntactically + * correct. Patterns can be simple and need not cover all syntactical corner + * cases, as long as they are sufficiently permissive. + * + * @since 6.10.1 + */ +@SuppressWarnings({"ImmutableEnumChecker", "nls"}) +public enum DiffDriver { + /** + * Built-in diff driver for c++ + */ + cpp(List.of( + /* Jump targets or access declarations */ + "^[ \\t]*[A-Za-z_][A-Za-z_0-9]*:\\s*($|/[/*])"), List.of( + /* functions/methods, variables, and compounds at top level */ + "^((::\\s*)?[A-Za-z_].*)$")), + /** + * Built-in diff driver for device + * tree files + */ + dts(List.of(";", "="), List.of( + /* lines beginning with a word optionally preceded by '&' or the root */ + "^[ \\t]*((/[ \\t]*\\{|&?[a-zA-Z_]).*)")), + /** + * Built-in diff driver for java + */ + java(List.of( + "^[ \\t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)"), + List.of( + /* Class, enum, interface, and record declarations */ + "^[ \\t]*(([a-z-]+[ \\t]+)*(class|enum|interface|record)[ \\t]+.*)$", + /* Method definitions; note that constructor signatures are not */ + /* matched because they are indistinguishable from method calls. */ + "^[ \\t]*(([A-Za-z_<>&\\]\\[][?&<>.,A-Za-z_0-9]*[ \\t]+)+[A-Za-z_]" + + "[A-Za-z_0-9]*[ \\t]*\\([^;]*)$")), + /** + * Built-in diff driver for python + */ + python(List.of("^[ \\t]*((class|(async[ \\t]+)?def)[ \\t].*)$")), + /** + * Built-in diff driver for java + */ + rust(List.of("^[\\t ]*((pub(\\([^\\)]+\\))?[\\t ]+)?" + + "((async|const|unsafe|extern([\\t ]+\"[^\"]+\"))[\\t ]+)?" + + "(struct|enum|union|mod|trait|fn|impl|macro_rules!)[< \\t]+[^;]*)$")); + + private final List negatePatterns; + + private final List matchPatterns; + + DiffDriver(List negate, List match, int flags) { + if (negate != null) { + this.negatePatterns = negate.stream() + .map(r -> Pattern.compile(r, flags)) + .collect(Collectors.toList()); + } else { + this.negatePatterns = null; + } + this.matchPatterns = match.stream().map(r -> Pattern.compile(r, flags)) + .collect(Collectors.toList()); + } + + DiffDriver(List match) { + this(null, match, 0); + } + + DiffDriver(List negate, List match) { + this(negate, match, 0); + } + + /** + * Returns the list of patterns used to exclude certain lines from being + * considered as function names. + * + * @return the list of negate patterns + */ + public List getNegatePatterns() { + return negatePatterns; + } + + /** + * Returns the list of patterns used to match lines for potential function + * names. + * + * @return the list of match patterns + */ + public List getMatchPatterns() { + return matchPatterns; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java index 2f472b5c0a..fa446e18cd 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java @@ -30,7 +30,9 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.regex.Pattern; import org.eclipse.jgit.api.errors.CanceledException; +import org.eclipse.jgit.attributes.Attribute; import org.eclipse.jgit.diff.DiffAlgorithm.SupportedAlgorithm; import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.dircache.DirCacheIterator; @@ -703,7 +705,7 @@ public class DiffFormatter implements AutoCloseable { */ public void format(DiffEntry ent) throws IOException { FormatResult res = createFormatResult(ent); - format(res.header, res.a, res.b); + format(res.header, res.a, res.b, getDiffDriver(ent)); } private static byte[] writeGitLinkText(AbbreviatedObjectId id) { @@ -749,11 +751,14 @@ public class DiffFormatter implements AutoCloseable { * text source for the post-image version of the content. This * must match the content of * {@link org.eclipse.jgit.patch.FileHeader#getNewId()}. + * @param diffDriver + * the diff driver used to obtain function names in hunk headers * @throws java.io.IOException - * writing to the supplied stream failed. + * writing to the supplied stream failed. + * @since 6.10.1 */ - public void format(FileHeader head, RawText a, RawText b) - throws IOException { + public void format(FileHeader head, RawText a, RawText b, + DiffDriver diffDriver) throws IOException { // Reuse the existing FileHeader as-is by blindly copying its // header lines, but avoiding its hunks. Instead we recreate // the hunks from the text instances we have been supplied. @@ -763,8 +768,49 @@ public class DiffFormatter implements AutoCloseable { if (!head.getHunks().isEmpty()) end = head.getHunks().get(0).getStartOffset(); out.write(head.getBuffer(), start, end - start); - if (head.getPatchType() == PatchType.UNIFIED) - format(head.toEditList(), a, b); + if (head.getPatchType() == PatchType.UNIFIED) { + format(head.toEditList(), a, b, diffDriver); + } + } + + /** + * Format a patch script, reusing a previously parsed FileHeader. + *

+ * This formatter is primarily useful for editing an existing patch script + * to increase or reduce the number of lines of context within the script. + * All header lines are reused as-is from the supplied FileHeader. + * + * @param head + * existing file header containing the header lines to copy. + * @param a + * text source for the pre-image version of the content. This must match + * the content of {@link org.eclipse.jgit.patch.FileHeader#getOldId()}. + * @param b + * text source for the post-image version of the content. This must match + * the content of {@link org.eclipse.jgit.patch.FileHeader#getNewId()}. + * @throws java.io.IOException + * writing to the supplied stream failed. + */ + public void format(FileHeader head, RawText a, RawText b) + throws IOException { + format(head, a, b, null); + } + + /** + * Formats a list of edits in unified diff format + * + * @param edits + * some differences which have been calculated between A and B + * @param a + * the text A which was compared + * @param b + * the text B which was compared + * @throws java.io.IOException + * if an IO error occurred + */ + public void format(EditList edits, RawText a, RawText b) + throws IOException { + format(edits, a, b, null); } /** @@ -776,11 +822,14 @@ public class DiffFormatter implements AutoCloseable { * the text A which was compared * @param b * the text B which was compared + * @param diffDriver + * the diff driver used to obtain function names in hunk headers * @throws java.io.IOException * if an IO error occurred + * @since 6.10.1 */ - public void format(EditList edits, RawText a, RawText b) - throws IOException { + public void format(EditList edits, RawText a, RawText b, + DiffDriver diffDriver) throws IOException { for (int curIdx = 0; curIdx < edits.size();) { Edit curEdit = edits.get(curIdx); final int endIdx = findCombinedEnd(edits, curIdx); @@ -791,7 +840,8 @@ public class DiffFormatter implements AutoCloseable { final int aEnd = (int) Math.min(a.size(), (long) endEdit.getEndA() + context); final int bEnd = (int) Math.min(b.size(), (long) endEdit.getEndB() + context); - writeHunkHeader(aCur, aEnd, bCur, bEnd); + writeHunkHeader(aCur, aEnd, bCur, bEnd, + getFuncName(a, aCur - 1, diffDriver)); while (aCur < aEnd || bCur < bEnd) { if (aCur < curEdit.getBeginA() || endIdx + 1 < curIdx) { @@ -881,8 +931,30 @@ public class DiffFormatter implements AutoCloseable { * @throws java.io.IOException * if an IO error occurred */ - protected void writeHunkHeader(int aStartLine, int aEndLine, - int bStartLine, int bEndLine) throws IOException { + protected void writeHunkHeader(int aStartLine, int aEndLine, int bStartLine, + int bEndLine) throws IOException { + writeHunkHeader(aStartLine, aEndLine, bStartLine, bEndLine, null); + } + + /** + * Output a hunk header + * + * @param aStartLine + * within first source + * @param aEndLine + * within first source + * @param bStartLine + * within second source + * @param bEndLine + * within second source + * @param funcName + * function name of this hunk + * @throws java.io.IOException + * if an IO error occurred + * @since 6.10.1 + */ + protected void writeHunkHeader(int aStartLine, int aEndLine, int bStartLine, + int bEndLine, String funcName) throws IOException { out.write('@'); out.write('@'); writeRange('-', aStartLine + 1, aEndLine - aStartLine); @@ -890,6 +962,10 @@ public class DiffFormatter implements AutoCloseable { out.write(' '); out.write('@'); out.write('@'); + if (funcName != null) { + out.write(' '); + out.write(funcName.getBytes()); + } out.write('\n'); } @@ -1247,4 +1323,46 @@ public class DiffFormatter implements AutoCloseable { private static boolean end(Edit edit, int a, int b) { return edit.getEndA() <= a && edit.getEndB() <= b; } + + private String getFuncName(RawText text, int startAt, + DiffDriver diffDriver) { + if (diffDriver != null) { + while (startAt > 0) { + String line = text.getString(startAt); + startAt--; + if (matchesAny(diffDriver.getNegatePatterns(), line)) { + continue; + } + if (matchesAny(diffDriver.getMatchPatterns(), line)) { + String funcName = line.replaceAll("^[ \\t]+", ""); + return funcName.substring(0, + Math.min(funcName.length(), 80)).trim(); + } + } + } + return null; + } + + private boolean matchesAny(List patterns, String text) { + if (patterns != null) { + for (Pattern p : patterns) { + if (p.matcher(text).find()) { + return true; + } + } + } + return false; + } + + private DiffDriver getDiffDriver(DiffEntry entry) { + Attribute diffAttr = entry.getDiffAttribute(); + if (diffAttr != null) { + try { + return DiffDriver.valueOf(diffAttr.getValue()); + } catch (IllegalArgumentException e) { + return null; + } + } + return null; + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatchIdDiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatchIdDiffFormatter.java index 4343642f9a..b401bbe73d 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatchIdDiffFormatter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatchIdDiffFormatter.java @@ -44,8 +44,8 @@ public class PatchIdDiffFormatter extends DiffFormatter { } @Override - protected void writeHunkHeader(int aStartLine, int aEndLine, - int bStartLine, int bEndLine) throws IOException { + protected void writeHunkHeader(int aStartLine, int aEndLine, int bStartLine, + int bEndLine, String funcName) throws IOException { // The hunk header is not taken into account for patch id calculation } -- cgit v1.2.3