/* * Copyright (C) 2010, 2021 Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import java.io.BufferedInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.ObjectStream; import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.treewalk.WorkingTreeIterator; import org.eclipse.jgit.treewalk.filter.PathFilter; /** * Supplies the content of a file for * {@link org.eclipse.jgit.diff.DiffFormatter}. *

* A content source is not thread-safe. Sources may contain state, including * information about the last ObjectLoader they returned. Callers must be * careful to ensure there is no more than one ObjectLoader pending on any * source, at any time. */ public abstract class ContentSource { /** * Construct a content source for an ObjectReader. * * @param reader * the reader to obtain blobs from. * @return a source wrapping the reader. */ public static ContentSource create(ObjectReader reader) { return new ObjectReaderSource(reader); } /** * Construct a content source for a working directory. * * If the iterator is a {@link org.eclipse.jgit.treewalk.FileTreeIterator} * an optimized version is used that doesn't require seeking through a * TreeWalk. * * @param iterator * the iterator to obtain source files through. * @return a content source wrapping the iterator. */ public static ContentSource create(WorkingTreeIterator iterator) { return new WorkingTreeSource(iterator); } /** * Determine the size of the object. * * @param path * the path of the file, relative to the root of the repository. * @param id * blob id of the file, if known. * @return the size in bytes. * @throws java.io.IOException * the file cannot be accessed. */ public abstract long size(String path, ObjectId id) throws IOException; /** * Open the object. * * @param path * the path of the file, relative to the root of the repository. * @param id * blob id of the file, if known. * @return a loader that can supply the content of the file. The loader must * be used before another loader can be obtained from this same * source. * @throws java.io.IOException * the file cannot be accessed. */ public abstract ObjectLoader open(String path, ObjectId id) throws IOException; /** * Closes the used resources like ObjectReader, TreeWalk etc. Default * implementation does nothing. * * @since 6.2 */ public void close() { // Do nothing } /** * Checks if the source is from "working tree", so it can be accessed as a * file directly. * * @since 6.2 * * @return true if working tree source and false otherwise (loader must be * used) */ public boolean isWorkingTreeSource() { return false; } private static class ObjectReaderSource extends ContentSource { private final ObjectReader reader; ObjectReaderSource(ObjectReader reader) { this.reader = reader; } @Override public long size(String path, ObjectId id) throws IOException { try { return reader.getObjectSize(id, Constants.OBJ_BLOB); } catch (MissingObjectException ignore) { return 0; } } @Override public ObjectLoader open(String path, ObjectId id) throws IOException { return reader.open(id, Constants.OBJ_BLOB); } @Override public void close() { reader.close(); } @Override public boolean isWorkingTreeSource() { return false; } } private static class WorkingTreeSource extends ContentSource { private final TreeWalk tw; private final WorkingTreeIterator iterator; private String current; WorkingTreeIterator ptr; WorkingTreeSource(WorkingTreeIterator iterator) { this.tw = new TreeWalk(iterator.getRepository(), (ObjectReader) null); this.tw.setRecursive(true); this.iterator = iterator; } @Override public long size(String path, ObjectId id) throws IOException { seek(path); return ptr.getEntryLength(); } @Override public ObjectLoader open(String path, ObjectId id) throws IOException { seek(path); long entrySize = ptr.getEntryContentLength(); return new ObjectLoader() { @Override public long getSize() { return entrySize; } @Override public int getType() { return ptr.getEntryFileMode().getObjectType(); } @Override public ObjectStream openStream() throws MissingObjectException, IOException { long contentLength = entrySize; InputStream in = ptr.openEntryStream(); in = new BufferedInputStream(in); return new ObjectStream.Filter(getType(), contentLength, in); } @Override public boolean isLarge() { return true; } @Override public byte[] getCachedBytes() throws LargeObjectException { throw new LargeObjectException(); } }; } private void seek(String path) throws IOException { if (!path.equals(current)) { iterator.reset(); // Possibly this iterator had an associated DirCacheIterator, // but we have no access to it and thus don't know about it. // We have to reset this iterator here to work without // DirCacheIterator and to descend always into ignored // directories. Otherwise we might not find tracked files below // ignored folders. Since we're looking only for a single // specific path this is not a performance problem. iterator.setWalkIgnoredDirectories(true); iterator.setDirCacheIterator(null, -1); tw.reset(); tw.addTree(iterator); tw.setFilter(PathFilter.create(path)); current = path; if (!tw.next()) throw new FileNotFoundException(path); ptr = tw.getTree(0, WorkingTreeIterator.class); if (ptr == null) throw new FileNotFoundException(path); } } @Override public void close() { tw.close(); } @Override public boolean isWorkingTreeSource() { return true; } } /** A pair of sources to access the old and new sides of a DiffEntry. */ public static final class Pair { private final ContentSource oldSource; private final ContentSource newSource; /** * Construct a pair of sources. * * @param oldSource * source to read the old side of a DiffEntry. * @param newSource * source to read the new side of a DiffEntry. */ public Pair(ContentSource oldSource, ContentSource newSource) { this.oldSource = oldSource; this.newSource = newSource; } /** * Determine the size of the object. * * @param side * which side of the entry to read (OLD or NEW). * @param ent * the entry to examine. * @return the size in bytes. * @throws IOException * the file cannot be accessed. */ public long size(DiffEntry.Side side, DiffEntry ent) throws IOException { switch (side) { case OLD: return oldSource.size(ent.oldPath, ent.oldId.toObjectId()); case NEW: return newSource.size(ent.newPath, ent.newId.toObjectId()); default: throw new IllegalArgumentException(); } } /** * Open the object. * * @param side * which side of the entry to read (OLD or NEW). * @param ent * the entry to examine. * @return a loader that can supply the content of the file. The loader * must be used before another loader can be obtained from this * same source. * @throws IOException * the file cannot be accessed. */ public ObjectLoader open(DiffEntry.Side side, DiffEntry ent) throws IOException { switch (side) { case OLD: return oldSource.open(ent.oldPath, ent.oldId.toObjectId()); case NEW: return newSource.open(ent.newPath, ent.newId.toObjectId()); default: throw new IllegalArgumentException(); } } /** * Closes used resources. * * @since 6.2 */ public void close() { oldSource.close(); newSource.close(); } /** * Checks if source (side) is a "working tree". * * @since 6.2 * * @param side * which side of the entry to read (OLD or NEW). * @return is the source a "working tree" * */ public boolean isWorkingTreeSource(DiffEntry.Side side) { switch (side) { case OLD: return oldSource.isWorkingTreeSource(); case NEW: return newSource.isWorkingTreeSource(); default: throw new IllegalArgumentException(); } } } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 7349 Content-Disposition: inline; filename="DiffAlgorithm.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "7821efd2a6d6ae6056880ff031a59656e06118d8" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; /** * Compares two {@link org.eclipse.jgit.diff.Sequence}s to create an * {@link org.eclipse.jgit.diff.EditList} of changes. *

* An algorithm's {@code diff} method must be callable from concurrent threads * without data collisions. This permits some algorithms to use a singleton * pattern, with concurrent invocations using the same singleton. Other * algorithms may support parameterization, in which case the caller can create * a unique instance per thread. */ public abstract class DiffAlgorithm { /** * Supported diff algorithm */ public enum SupportedAlgorithm { /** * Myers diff algorithm */ MYERS, /** * Histogram diff algorithm */ HISTOGRAM } /** * Get diff algorithm * * @param alg * the diff algorithm for which an implementation should be * returned * @return an implementation of the specified diff algorithm */ public static DiffAlgorithm getAlgorithm(SupportedAlgorithm alg) { switch (alg) { case MYERS: return MyersDiff.INSTANCE; case HISTOGRAM: return new HistogramDiff(); default: throw new IllegalArgumentException(); } } /** * Compare two sequences and identify a list of edits between them. * * @param * type of sequence being compared. * @param cmp * the comparator supplying the element equivalence function. * @param a * the first (also known as old or pre-image) sequence. Edits * returned by this algorithm will reference indexes using the * 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()}, * {@link org.eclipse.jgit.diff.Edit#getEndA()}. * @param b * the second (also known as new or post-image) sequence. Edits * returned by this algorithm will reference indexes using the * 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()}, * {@link org.eclipse.jgit.diff.Edit#getEndB()}. * @return a modifiable edit list comparing the two sequences. If empty, the * sequences are identical according to {@code cmp}'s rules. The * result list is never null. */ public EditList diff( SequenceComparator cmp, S a, S b) { Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b)); switch (region.getType()) { case INSERT: case DELETE: return EditList.singleton(region); case REPLACE: { if (region.getLengthA() == 1 && region.getLengthB() == 1) return EditList.singleton(region); SubsequenceComparator cs = new SubsequenceComparator<>(cmp); Subsequence as = Subsequence.a(a, region); Subsequence bs = Subsequence.b(b, region); EditList e = Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs); return normalize(cmp, e, a, b); } case EMPTY: return new EditList(0); default: throw new IllegalStateException(); } } private static Edit coverEdit(S a, S b) { return new Edit(0, a.size(), 0, b.size()); } /** * Reorganize an {@link EditList} for better diff consistency. *

~~* {@code DiffAlgorithms} may return {@link Edit.Type#INSERT} or * {@link Edit.Type#DELETE} edits that can be "shifted". For * example, the deleted section *~~

	 * -a
	 * -b
	 * -c
	 *  a
	 *  b
	 *  c
	 *

* can be shifted down by 1, 2 or 3 locations. *
* To avoid later merge issues, we shift such edits to a * consistent location. {@code normalize} uses a simple strategy of * shifting such edits to their latest possible location. *
* This strategy may not always produce an aesthetically pleasing * diff. For instance, it works well with *
* function1 { * ... * } * * +function2 { * + ... * +} * + * function3 { * ... * } *
* but less so for *
* # * # comment1 * # * function1() { * } * * # * +# comment3 * +# * +function3() { * +} * + * +# * # comment2 * # * function2() { * } *
* More * sophisticated strategies are possible, say by calculating a * suitable "aesthetic cost" for each possible position and using * the lowest cost, but {@code normalize} just shifts edits * to the end as much as possible. * * @param * type of sequence being compared. * @param cmp * the comparator supplying the element equivalence function. * @param e * a modifiable edit list comparing the provided sequences. * @param a * the first (also known as old or pre-image) sequence. * @param b * the second (also known as new or post-image) sequence. * @return a modifiable edit list with edit regions shifted to their * latest possible location. The result list is never null. * @since 4.7 */ private static EditList normalize( SequenceComparator cmp, EditList e, S a, S b) { Edit prev = null; for (int i = e.size() - 1; i >= 0; i--) { Edit cur = e.get(i); Edit.Type curType = cur.getType(); int maxA = (prev == null) ? a.size() : prev.beginA; int maxB = (prev == null) ? b.size() : prev.beginB; if (curType == Edit.Type.INSERT) { while (cur.endA < maxA && cur.endB < maxB && cmp.equals(b, cur.beginB, b, cur.endB)) { cur.shift(1); } } else if (curType == Edit.Type.DELETE) { while (cur.endA < maxA && cur.endB < maxB && cmp.equals(a, cur.beginA, a, cur.endA)) { cur.shift(1); } } prev = cur; } return e; } /** * Compare two sequences and identify a list of edits between them. * * This method should be invoked only after the two sequences have been * proven to have no common starting or ending elements. The expected * elimination of common starting and ending elements is automatically * performed by the {@link #diff(SequenceComparator, Sequence, Sequence)} * method, which invokes this method using * {@link org.eclipse.jgit.diff.Subsequence}s. * * @param * type of sequence being compared. * @param cmp * the comparator supplying the element equivalence function. * @param a * the first (also known as old or pre-image) sequence. Edits * returned by this algorithm will reference indexes using the * 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()}, * {@link org.eclipse.jgit.diff.Edit#getEndA()}. * @param b * the second (also known as new or post-image) sequence. Edits * returned by this algorithm will reference indexes using the * 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()}, * {@link org.eclipse.jgit.diff.Edit#getEndB()}. * @return a modifiable edit list comparing the two sequences. */ public abstract EditList diffNonCommon( SequenceComparator cmp, S a, S b); } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 3188 Content-Disposition: inline; filename="DiffConfig.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "b062ee7ee4be9839820b58c874666b17ec674510" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import java.text.MessageFormat; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.lib.Config; import org.eclipse.jgit.lib.Config.SectionParser; import org.eclipse.jgit.lib.ConfigConstants; import org.eclipse.jgit.util.StringUtils; /** * Keeps track of diff related configuration options. */ public class DiffConfig { /** Key for {@link Config#get(SectionParser)}. */ public static final Config.SectionParser KEY = DiffConfig::new; /** Permissible values for {@code diff.renames}. */ public enum RenameDetectionType { /** Rename detection is disabled. */ FALSE, /** Rename detection is enabled. */ TRUE, /** Copies should be detected too. */ COPY } private final boolean noPrefix; private final RenameDetectionType renameDetectionType; private final int renameLimit; private DiffConfig(Config rc) { noPrefix = rc.getBoolean(ConfigConstants.CONFIG_DIFF_SECTION, ConfigConstants.CONFIG_KEY_NOPREFIX, false); renameDetectionType = parseRenameDetectionType(rc.getString( ConfigConstants.CONFIG_DIFF_SECTION, null, ConfigConstants.CONFIG_KEY_RENAMES)); renameLimit = rc.getInt(ConfigConstants.CONFIG_DIFF_SECTION, ConfigConstants.CONFIG_KEY_RENAMELIMIT, 400); } /** * If prefix should be suppressed * * @return true if the prefix "a/" and "b/" should be suppressed */ public boolean isNoPrefix() { return noPrefix; } /** * If rename detection is enabled * * @return true if rename detection is enabled by default */ public boolean isRenameDetectionEnabled() { return renameDetectionType != RenameDetectionType.FALSE; } /** * Get the rename detection type * * @return type of rename detection to perform */ public RenameDetectionType getRenameDetectionType() { return renameDetectionType; } /** * Get the rename limit * * @return limit on number of paths to perform inexact rename detection */ public int getRenameLimit() { return renameLimit; } private static RenameDetectionType parseRenameDetectionType( final String renameString) { if (renameString == null) return RenameDetectionType.FALSE; else if (StringUtils.equalsIgnoreCase( ConfigConstants.CONFIG_RENAMELIMIT_COPY, renameString) || StringUtils .equalsIgnoreCase( ConfigConstants.CONFIG_RENAMELIMIT_COPIES, renameString)) return RenameDetectionType.COPY; else { final Boolean renameBoolean = StringUtils .toBooleanOrNull(renameString); if (renameBoolean == null) throw new IllegalArgumentException(MessageFormat.format( JGitText.get().enumValueNotSupported2, ConfigConstants.CONFIG_DIFF_SECTION, ConfigConstants.CONFIG_KEY_RENAMES, renameString)); else if (renameBoolean.booleanValue()) return RenameDetectionType.TRUE; else return RenameDetectionType.FALSE; } } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 3781 Content-Disposition: inline; filename="DiffDriver.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "b74444400ed98c23efcc46da8047520bd803d5f1" /* * Copyright (c) 2024 Qualcomm Innovation Center, Inc. * and other copyright owners as documented in the project's IP log. * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import java.util.List; import java.util.regex.Pattern; import java.util.stream.Collectors; /** * Built-in drivers for various languages, sorted by name. These drivers will be * used to determine function names for a hunk. *
* When writing or updating patterns, assume the contents are syntactically * correct. Patterns can be simple and need not cover all syntactical corner * cases, as long as they are sufficiently permissive. * * @since 6.10.1 */ @SuppressWarnings({"ImmutableEnumChecker", "nls"}) public enum DiffDriver { /** * Built-in diff driver for c++ */ cpp(List.of( /* Jump targets or access declarations */ "^[ \\t]*[A-Za-z_][A-Za-z_0-9]*:\\s*($|/[/*])"), List.of( /* functions/methods, variables, and compounds at top level */ "^((::\\s*)?[A-Za-z_].*)$")), /** * Built-in diff driver for device * tree files */ dts(List.of(";", "="), List.of( /* lines beginning with a word optionally preceded by '&' or the root */ "^[ \\t]*((/[ \\t]*\\{|&?[a-zA-Z_]).*)")), /** * Built-in diff driver for java */ java(List.of( "^[ \\t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)"), List.of( /* Class, enum, interface, and record declarations */ "^[ \\t]*(([a-z-]+[ \\t]+)*(class|enum|interface|record)[ \\t]+.*)$", /* Method definitions; note that constructor signatures are not */ /* matched because they are indistinguishable from method calls. */ "^[ \\t]*(([A-Za-z_<>&\\]\\[][?&<>.,A-Za-z_0-9]*[ \\t]+)+[A-Za-z_]" + "[A-Za-z_0-9]*[ \\t]*\$[^;]*)$")), /** * Built-in diff driver for * python */ python(List.of("^[ \\t]*((class|(async[ \\t]+)?def)[ \\t].*)$")), /** * Built-in diff driver for * rust */ rust(List.of("^[\\t ]*((pub(\\([^\$]+\\))?[\\t ]+)?" + "((async|const|unsafe|extern([\\t ]+\"[^\"]+\"))[\\t ]+)?" + "(struct|enum|union|mod|trait|fn|impl|macro_rules!)[< \\t]+[^;]*)$")); private final List negatePatterns; private final List matchPatterns; DiffDriver(List negate, List match, int flags) { if (negate != null) { this.negatePatterns = negate.stream() .map(r -> Pattern.compile(r, flags)) .collect(Collectors.toList()); } else { this.negatePatterns = null; } this.matchPatterns = match.stream().map(r -> Pattern.compile(r, flags)) .collect(Collectors.toList()); } DiffDriver(List match) { this(null, match, 0); } DiffDriver(List negate, List match) { this(negate, match, 0); } /** * Returns the list of patterns used to exclude certain lines from being * considered as function names. * * @return the list of negate patterns */ public List getNegatePatterns() { return negatePatterns; } /** * Returns the list of patterns used to match lines for potential function * names. * * @return the list of match patterns */ public List getMatchPatterns() { return matchPatterns; } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 14963 Content-Disposition: inline; filename="DiffEntry.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "f8c5399983271d0ac7b193bf51c09cc4bb723cef" /* * Copyright (C) 2008-2013, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.eclipse.jgit.attributes.Attribute; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.lib.AbbreviatedObjectId; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.MutableObjectId; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.treewalk.filter.TreeFilter; import org.eclipse.jgit.treewalk.filter.TreeFilterMarker; /** * A value class representing a change to a file */ public class DiffEntry { /** Magical SHA1 used for file adds or deletes */ static final AbbreviatedObjectId A_ZERO = AbbreviatedObjectId .fromObjectId(ObjectId.zeroId()); /** Magical file name used for file adds or deletes. */ public static final String DEV_NULL = "/dev/null"; //$NON-NLS-1$ /** General type of change a single file-level patch describes. */ public enum ChangeType { /** Add a new file to the project */ ADD, /** Modify an existing file in the project (content and/or mode) */ MODIFY, /** Delete an existing file from the project */ DELETE, /** Rename an existing file to a new location */ RENAME, /** Copy an existing file to a new location, keeping the original */ COPY; } /** Specify the old or new side for more generalized access. */ public enum Side { /** The old side of a DiffEntry. */ OLD, /** The new side of a DiffEntry. */ NEW; } /** * Create an empty DiffEntry */ protected DiffEntry(){ // reduce the visibility of the default constructor } /** * Convert the TreeWalk into DiffEntry headers. * * @param walk * the TreeWalk to walk through. Must have exactly two trees. * @return headers describing the changed files. * @throws java.io.IOException * the repository cannot be accessed. * @throws java.lang.IllegalArgumentException * When given TreeWalk doesn't have exactly two trees. */ public static List scan(TreeWalk walk) throws IOException { return scan(walk, false); } /** * Convert the TreeWalk into DiffEntry headers, depending on * {@code includeTrees} it will add tree objects into result or not. * * @param walk * the TreeWalk to walk through. Must have exactly two trees and * when {@code includeTrees} parameter is {@code true} it can't * be recursive. * @param includeTrees * include tree objects. * @return headers describing the changed files. * @throws java.io.IOException * the repository cannot be accessed. * @throws java.lang.IllegalArgumentException * when {@code includeTrees} is true and given TreeWalk is * recursive. Or when given TreeWalk doesn't have exactly two * trees */ public static List scan(TreeWalk walk, boolean includeTrees) throws IOException { return scan(walk, includeTrees, null); } /** * Convert the TreeWalk into DiffEntry headers, depending on * {@code includeTrees} it will add tree objects into result or not. * * @param walk * the TreeWalk to walk through. Must have exactly two trees and * when {@code includeTrees} parameter is {@code true} it can't * be recursive. * @param includeTrees * include tree objects. * @param markTreeFilters * array of tree filters which will be tested for each entry. If * an entry matches, the entry will later return true when * queried through {{@link #isMarked(int)} (with the index from * this passed array). * @return headers describing the changed files. * @throws java.io.IOException * the repository cannot be accessed. * @throws java.lang.IllegalArgumentException * when {@code includeTrees} is true and given TreeWalk is * recursive. Or when given TreeWalk doesn't have exactly two * trees * @since 2.3 */ public static List scan(TreeWalk walk, boolean includeTrees, TreeFilter[] markTreeFilters) throws IOException { if (walk.getTreeCount() != 2) throw new IllegalArgumentException( JGitText.get().treeWalkMustHaveExactlyTwoTrees); if (includeTrees && walk.isRecursive()) throw new IllegalArgumentException( JGitText.get().cannotBeRecursiveWhenTreesAreIncluded); TreeFilterMarker treeFilterMarker; if (markTreeFilters != null && markTreeFilters.length > 0) treeFilterMarker = new TreeFilterMarker(markTreeFilters); else treeFilterMarker = null; List r = new ArrayList<>(); MutableObjectId idBuf = new MutableObjectId(); while (walk.next()) { DiffEntry entry = new DiffEntry(); walk.getObjectId(idBuf, 0); entry.oldId = AbbreviatedObjectId.fromObjectId(idBuf); walk.getObjectId(idBuf, 1); entry.newId = AbbreviatedObjectId.fromObjectId(idBuf); entry.oldMode = walk.getFileMode(0); entry.newMode = walk.getFileMode(1); entry.newPath = entry.oldPath = walk.getPathString(); if (walk.getAttributesNodeProvider() != null) { entry.diffAttribute = walk.getAttributes() .get(Constants.ATTR_DIFF); } if (treeFilterMarker != null) entry.treeFilterMarks = treeFilterMarker.getMarks(walk); if (entry.oldMode == FileMode.MISSING) { entry.oldPath = DiffEntry.DEV_NULL; entry.changeType = ChangeType.ADD; r.add(entry); } else if (entry.newMode == FileMode.MISSING) { entry.newPath = DiffEntry.DEV_NULL; entry.changeType = ChangeType.DELETE; r.add(entry); } else if (!entry.oldId.equals(entry.newId)) { entry.changeType = ChangeType.MODIFY; if (RenameDetector.sameType(entry.oldMode, entry.newMode)) r.add(entry); else r.addAll(breakModify(entry)); } else if (entry.oldMode != entry.newMode) { entry.changeType = ChangeType.MODIFY; r.add(entry); } if (includeTrees && walk.isSubtree()) walk.enterSubtree(); } return r; } static DiffEntry add(String path, AnyObjectId id) { DiffEntry e = new DiffEntry(); e.oldId = A_ZERO; e.oldMode = FileMode.MISSING; e.oldPath = DEV_NULL; e.newId = AbbreviatedObjectId.fromObjectId(id); e.newMode = FileMode.REGULAR_FILE; e.newPath = path; e.changeType = ChangeType.ADD; return e; } static DiffEntry delete(String path, AnyObjectId id) { DiffEntry e = new DiffEntry(); e.oldId = AbbreviatedObjectId.fromObjectId(id); e.oldMode = FileMode.REGULAR_FILE; e.oldPath = path; e.newId = A_ZERO; e.newMode = FileMode.MISSING; e.newPath = DEV_NULL; e.changeType = ChangeType.DELETE; return e; } static DiffEntry modify(String path) { DiffEntry e = new DiffEntry(); e.oldMode = FileMode.REGULAR_FILE; e.oldPath = path; e.newMode = FileMode.REGULAR_FILE; e.newPath = path; e.changeType = ChangeType.MODIFY; return e; } /** * Breaks apart a DiffEntry into two entries, one DELETE and one ADD. * * @param entry * the DiffEntry to break apart. * @return a list containing two entries. Calling {@link #getChangeType()} * on the first entry will return ChangeType.DELETE. Calling it on * the second entry will return ChangeType.ADD. */ static List breakModify(DiffEntry entry) { DiffEntry del = new DiffEntry(); del.oldId = entry.getOldId(); del.oldMode = entry.getOldMode(); del.oldPath = entry.getOldPath(); del.newId = A_ZERO; del.newMode = FileMode.MISSING; del.newPath = DiffEntry.DEV_NULL; del.changeType = ChangeType.DELETE; del.diffAttribute = entry.diffAttribute; DiffEntry add = new DiffEntry(); add.oldId = A_ZERO; add.oldMode = FileMode.MISSING; add.oldPath = DiffEntry.DEV_NULL; add.newId = entry.getNewId(); add.newMode = entry.getNewMode(); add.newPath = entry.getNewPath(); add.changeType = ChangeType.ADD; add.diffAttribute = entry.diffAttribute; return Arrays.asList(del, add); } static DiffEntry pair(ChangeType changeType, DiffEntry src, DiffEntry dst, int score) { DiffEntry r = new DiffEntry(); r.oldId = src.oldId; r.oldMode = src.oldMode; r.oldPath = src.oldPath; r.newId = dst.newId; r.newMode = dst.newMode; r.newPath = dst.newPath; r.diffAttribute = dst.diffAttribute; r.changeType = changeType; r.score = score; r.treeFilterMarks = src.treeFilterMarks | dst.treeFilterMarks; return r; } /** File name of the old (pre-image). */ protected String oldPath; /** File name of the new (post-image). */ protected String newPath; /** * diff filter attribute * * @since 4.11 */ protected Attribute diffAttribute; /** Old mode of the file, if described by the patch, else null. */ protected FileMode oldMode; /** New mode of the file, if described by the patch, else null. */ protected FileMode newMode; /** General type of change indicated by the patch. */ protected ChangeType changeType; /** Similarity score if {@link #changeType} is a copy or rename. */ protected int score; /** ObjectId listed on the index line for the old (pre-image) */ protected AbbreviatedObjectId oldId; /** ObjectId listed on the index line for the new (post-image) */ protected AbbreviatedObjectId newId; /** * Bitset for marked flags of tree filters passed to * {@link #scan(TreeWalk, boolean, TreeFilter...)} */ private int treeFilterMarks = 0; /** * Get the old name associated with this file. *
* The meaning of the old name can differ depending on the semantic meaning * of this patch: *
*
file add: always /dev/null
*
file modify: always {@link #getNewPath()}
*
file delete: always the file being deleted
*
file copy: source file the copy originates from
*
file rename: source file the rename originates from
*
* * @return old name for this file. */ public String getOldPath() { return oldPath; } /** * Get the new name associated with this file. *
* The meaning of the new name can differ depending on the semantic meaning * of this patch: *
*
file add: always the file being created
*
file modify: always {@link #getOldPath()}
*
file delete: always /dev/null
*
file copy: destination file the copy ends up at
*
file rename: destination file the rename ends up at
*
* * @return new name for this file. */ public String getNewPath() { return newPath; } /** * Get the path associated with this file. * * @param side * which path to obtain. * @return name for this file. */ public String getPath(Side side) { return side == Side.OLD ? getOldPath() : getNewPath(); } /** * Get diff attribute * * @return the {@link Attribute} determining filters to be applied. * @since 4.11 */ public Attribute getDiffAttribute() { return diffAttribute; } /** * Get the old file mode * * @return the old file mode, if described in the patch */ public FileMode getOldMode() { return oldMode; } /** * Get the new file mode * * @return the new file mode, if described in the patch */ public FileMode getNewMode() { return newMode; } /** * Get the mode associated with this file. * * @param side * which mode to obtain. * @return the mode. */ public FileMode getMode(Side side) { return side == Side.OLD ? getOldMode() : getNewMode(); } /** * Get the change type * * @return the type of change this patch makes on {@link #getNewPath()} */ public ChangeType getChangeType() { return changeType; } /** * Get similarity score * * @return similarity score between {@link #getOldPath()} and * {@link #getNewPath()} if {@link #getChangeType()} is * {@link org.eclipse.jgit.diff.DiffEntry.ChangeType#COPY} or * {@link org.eclipse.jgit.diff.DiffEntry.ChangeType#RENAME}. */ public int getScore() { return score; } /** * Get the old object id from the index. * * @return the object id; null if there is no index line */ public AbbreviatedObjectId getOldId() { return oldId; } /** * Get the new object id from the index. * * @return the object id; null if there is no index line */ public AbbreviatedObjectId getNewId() { return newId; } /** * Whether the mark tree filter with the specified index matched during scan * or not, see {@link #scan(TreeWalk, boolean, TreeFilter...)}. Example: * *
* TreeFilter filterA = ...; * TreeFilter filterB = ...; * List<DiffEntry> entries = DiffEntry.scan(walk, false, filterA, filterB); * DiffEntry entry = entries.get(0); * boolean filterAMatched = entry.isMarked(0); * boolean filterBMatched = entry.isMarked(1); *
*
* Note that 0 corresponds to filterA because it was the first filter that * was passed to scan. *
* To query more than one flag at once, see {@link #getTreeFilterMarks()}. * * @param index * the index of the tree filter to check for (must be between 0 * and {@link java.lang.Integer#SIZE}). * @since 2.3 * @return a boolean. */ public boolean isMarked(int index) { return (treeFilterMarks & (1L << index)) != 0; } /** * Get the raw tree filter marks, as set during * {@link #scan(TreeWalk, boolean, TreeFilter...)}. See * {@link #isMarked(int)} to query each mark individually. * * @return the bitset of tree filter marks * @since 2.3 */ public int getTreeFilterMarks() { return treeFilterMarks; } /** * Get the object id. * * @param side * the side of the id to get. * @return the object id; null if there is no index line */ public AbbreviatedObjectId getId(Side side) { return side == Side.OLD ? getOldId() : getNewId(); } @SuppressWarnings("nls") @Override public String toString() { StringBuilder buf = new StringBuilder(); buf.append("DiffEntry["); buf.append(changeType); buf.append(" "); switch (changeType) { case ADD: buf.append(newPath); break; case COPY: buf.append(oldPath + "->" + newPath); break; case DELETE: buf.append(oldPath); break; case MODIFY: buf.append(oldPath); break; case RENAME: buf.append(oldPath + "->" + newPath); break; } buf.append("]"); return buf.toString(); } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 41144 Content-Disposition: inline; filename="DiffFormatter.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "cbac3f90b702362688917ddbff2d94f3fa026e59" /* * Copyright (C) 2009, Google Inc. * Copyright (C) 2008-2020, Johannes E. Schindelin and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import static org.eclipse.jgit.diff.DiffEntry.ChangeType.ADD; import static org.eclipse.jgit.diff.DiffEntry.ChangeType.COPY; import static org.eclipse.jgit.diff.DiffEntry.ChangeType.DELETE; import static org.eclipse.jgit.diff.DiffEntry.ChangeType.MODIFY; import static org.eclipse.jgit.diff.DiffEntry.ChangeType.RENAME; import static org.eclipse.jgit.diff.DiffEntry.Side.NEW; import static org.eclipse.jgit.diff.DiffEntry.Side.OLD; import static org.eclipse.jgit.lib.Constants.OBJECT_ID_ABBREV_STRING_LENGTH; import static org.eclipse.jgit.lib.Constants.encode; import static org.eclipse.jgit.lib.Constants.encodeASCII; import static org.eclipse.jgit.lib.FileMode.GITLINK; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.regex.Pattern; import org.eclipse.jgit.api.errors.CanceledException; import org.eclipse.jgit.attributes.Attribute; import org.eclipse.jgit.diff.DiffAlgorithm.SupportedAlgorithm; import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.dircache.DirCacheIterator; import org.eclipse.jgit.errors.AmbiguousObjectException; import org.eclipse.jgit.errors.BinaryBlobException; import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.lib.AbbreviatedObjectId; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Config; import org.eclipse.jgit.lib.ConfigConstants; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.patch.FileHeader; import org.eclipse.jgit.patch.FileHeader.PatchType; import org.eclipse.jgit.revwalk.FollowFilter; import org.eclipse.jgit.revwalk.RevTree; import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.storage.pack.PackConfig; import org.eclipse.jgit.treewalk.AbstractTreeIterator; import org.eclipse.jgit.treewalk.CanonicalTreeParser; import org.eclipse.jgit.treewalk.EmptyTreeIterator; import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.treewalk.WorkingTreeIterator; import org.eclipse.jgit.treewalk.filter.AndTreeFilter; import org.eclipse.jgit.treewalk.filter.IndexDiffFilter; import org.eclipse.jgit.treewalk.filter.NotIgnoredFilter; import org.eclipse.jgit.treewalk.filter.PathFilter; import org.eclipse.jgit.treewalk.filter.TreeFilter; import org.eclipse.jgit.util.LfsFactory; import org.eclipse.jgit.util.QuotedString; /** * Format a Git style patch script. */ public class DiffFormatter implements AutoCloseable { private static final int DEFAULT_BINARY_FILE_THRESHOLD = PackConfig.DEFAULT_BIG_FILE_THRESHOLD; private static final byte[] noNewLine = encodeASCII("\\ No newline at end of file\n"); //$NON-NLS-1$ /** Magic return content indicating it is empty or no content present. */ private static final byte[] EMPTY = new byte[] {}; private final OutputStream out; private ObjectReader reader; private boolean closeReader; private DiffConfig diffCfg; private int context = 3; private int abbreviationLength = OBJECT_ID_ABBREV_STRING_LENGTH; private DiffAlgorithm diffAlgorithm; private RawTextComparator comparator = RawTextComparator.DEFAULT; private int binaryFileThreshold = DEFAULT_BINARY_FILE_THRESHOLD; private String oldPrefix = "a/"; //$NON-NLS-1$ private String newPrefix = "b/"; //$NON-NLS-1$ private TreeFilter pathFilter = TreeFilter.ALL; private RenameDetector renameDetector; private ProgressMonitor progressMonitor; private ContentSource.Pair source; private Repository repository; private Boolean quotePaths; /** * Create a new formatter with a default level of context. * * @param out * the stream the formatter will write line data to. This stream * should have buffering arranged by the caller, as many small * writes are performed to it. */ public DiffFormatter(OutputStream out) { this.out = out; } /** * Get output stream * * @return the stream we are outputting data to */ protected OutputStream getOutputStream() { return out; } /** * Set the repository the formatter can load object contents from. * * Once a repository has been set, the formatter must be released to ensure * the internal ObjectReader is able to release its resources. * * @param repository * source repository holding referenced objects. */ public void setRepository(Repository repository) { this.repository = repository; setReader(repository.newObjectReader(), repository.getConfig(), true); } /** * Set the repository the formatter can load object contents from. * * @param reader * source reader holding referenced objects. Caller is responsible * for closing the reader. * @param cfg * config specifying diff algorithm and rename detection options. * @since 4.5 */ public void setReader(ObjectReader reader, Config cfg) { setReader(reader, cfg, false); } private void setReader(ObjectReader reader, Config cfg, boolean closeReader) { close(); this.closeReader = closeReader; this.reader = reader; this.diffCfg = cfg.get(DiffConfig.KEY); if (quotePaths == null) { quotePaths = Boolean .valueOf(cfg.getBoolean(ConfigConstants.CONFIG_CORE_SECTION, ConfigConstants.CONFIG_KEY_QUOTE_PATH, true)); } ContentSource cs = ContentSource.create(reader); source = new ContentSource.Pair(cs, cs); if (diffCfg.isNoPrefix()) { setOldPrefix(""); //$NON-NLS-1$ setNewPrefix(""); //$NON-NLS-1$ } setDetectRenames(diffCfg.isRenameDetectionEnabled()); diffAlgorithm = DiffAlgorithm.getAlgorithm(cfg.getEnum( ConfigConstants.CONFIG_DIFF_SECTION, null, ConfigConstants.CONFIG_KEY_ALGORITHM, SupportedAlgorithm.HISTOGRAM)); } /** * Change the number of lines of context to display. * * @param lineCount * number of lines of context to see before the first * modification and after the last modification within a hunk of * the modified file. */ public void setContext(int lineCount) { if (lineCount < 0) throw new IllegalArgumentException( JGitText.get().contextMustBeNonNegative); context = lineCount; } /** * Change the number of digits to show in an ObjectId. * * @param count * number of digits to show in an ObjectId. */ public void setAbbreviationLength(int count) { if (count < 0) throw new IllegalArgumentException( JGitText.get().abbreviationLengthMustBeNonNegative); abbreviationLength = count; } /** * Set the algorithm that constructs difference output. * * @param alg * the algorithm to produce text file differences. * @see HistogramDiff */ public void setDiffAlgorithm(DiffAlgorithm alg) { diffAlgorithm = alg; } /** * Set the line equivalence function for text file differences. * * @param cmp * The equivalence function used to determine if two lines of * text are identical. The function can be changed to ignore * various types of whitespace. * @see RawTextComparator#DEFAULT * @see RawTextComparator#WS_IGNORE_ALL * @see RawTextComparator#WS_IGNORE_CHANGE * @see RawTextComparator#WS_IGNORE_LEADING * @see RawTextComparator#WS_IGNORE_TRAILING */ public void setDiffComparator(RawTextComparator cmp) { comparator = cmp; } /** * Set maximum file size for text files. * * Files larger than this size will be treated as though they are binary and * not text. Default is {@value #DEFAULT_BINARY_FILE_THRESHOLD} . * * @param threshold * the limit, in bytes. Files larger than this size will be * assumed to be binary, even if they aren't. */ public void setBinaryFileThreshold(int threshold) { this.binaryFileThreshold = threshold; } /** * Set the prefix applied in front of old file paths. * * @param prefix * the prefix in front of old paths. Typically this is the * standard string {@code "a/"}, but may be any prefix desired by * the caller. Must not be null. Use the empty string to have no * prefix at all. */ public void setOldPrefix(String prefix) { oldPrefix = prefix; } /** * Get the prefix applied in front of old file paths. * * @return the prefix * @since 2.0 */ public String getOldPrefix() { return this.oldPrefix; } /** * Set the prefix applied in front of new file paths. * * @param prefix * the prefix in front of new paths. Typically this is the * standard string {@code "b/"}, but may be any prefix desired by * the caller. Must not be null. Use the empty string to have no * prefix at all. */ public void setNewPrefix(String prefix) { newPrefix = prefix; } /** * Get the prefix applied in front of new file paths. * * @return the prefix * @since 2.0 */ public String getNewPrefix() { return this.newPrefix; } /** * Get if rename detection is enabled * * @return true if rename detection is enabled */ public boolean isDetectRenames() { return renameDetector != null; } /** * Enable or disable rename detection. * * Before enabling rename detection the repository must be set with * {@link #setRepository(Repository)}. Once enabled the detector can be * configured away from its defaults by obtaining the instance directly from * {@link #getRenameDetector()} and invoking configuration. * * @param on * if rename detection should be enabled. */ public void setDetectRenames(boolean on) { if (on && renameDetector == null) { assertHaveReader(); renameDetector = new RenameDetector(reader, diffCfg); } else if (!on) renameDetector = null; } /** * Get rename detector * * @return the rename detector if rename detection is enabled */ public RenameDetector getRenameDetector() { return renameDetector; } /** * Set the progress monitor for long running rename detection. * * @param pm * progress monitor to receive rename detection status through. */ public void setProgressMonitor(ProgressMonitor pm) { progressMonitor = pm; } /** * Sets whether or not path names should be quoted. *
* By default the setting of git config {@code core.quotePath} is active, * but this can be overridden through this method. *
* * @param quote * whether to quote path names * @since 5.6 */ public void setQuotePaths(boolean quote) { quotePaths = Boolean.valueOf(quote); } /** * Set the filter to produce only specific paths. * * If the filter is an instance of * {@link org.eclipse.jgit.revwalk.FollowFilter}, the filter path will be * updated during successive scan or format invocations. The updated path * can be obtained from {@link #getPathFilter()}. * * @param filter * the tree filter to apply. */ public void setPathFilter(TreeFilter filter) { pathFilter = filter != null ? filter : TreeFilter.ALL; } /** * Get path filter * * @return the current path filter */ public TreeFilter getPathFilter() { return pathFilter; } /** * Flush the underlying output stream of this formatter. * * @throws java.io.IOException * the stream's own flush method threw an exception. */ public void flush() throws IOException { out.flush(); } /** * {@inheritDoc} *
* Release the internal ObjectReader state. * * @since 4.0 */ @Override public void close() { if (reader != null && closeReader) { reader.close(); } } /** * Determine the differences between two trees. * * No output is created, instead only the file paths that are different are * returned. Callers may choose to format these paths themselves, or convert * them into {@link org.eclipse.jgit.patch.FileHeader} instances with a * complete edit list by calling {@link #toFileHeader(DiffEntry)}. *
* Either side may be null to indicate that the tree has beed added or * removed. The diff will be computed against nothing. * * @param a * the old (or previous) side or null * @param b * the new (or updated) side or null * @return the paths that are different. * @throws java.io.IOException * trees cannot be read or file contents cannot be read. */ public List scan(AnyObjectId a, AnyObjectId b) throws IOException { assertHaveReader(); try (RevWalk rw = new RevWalk(reader)) { RevTree aTree = a != null ? rw.parseTree(a) : null; RevTree bTree = b != null ? rw.parseTree(b) : null; return scan(aTree, bTree); } } /** * Determine the differences between two trees. * * No output is created, instead only the file paths that are different are * returned. Callers may choose to format these paths themselves, or convert * them into {@link org.eclipse.jgit.patch.FileHeader} instances with a * complete edit list by calling {@link #toFileHeader(DiffEntry)}. *
* Either side may be null to indicate that the tree has beed added or * removed. The diff will be computed against nothing. * * @param a * the old (or previous) side or null * @param b * the new (or updated) side or null * @return the paths that are different. * @throws java.io.IOException * trees cannot be read or file contents cannot be read. */ public List scan(RevTree a, RevTree b) throws IOException { assertHaveReader(); AbstractTreeIterator aIterator = makeIteratorFromTreeOrNull(a); AbstractTreeIterator bIterator = makeIteratorFromTreeOrNull(b); return scan(aIterator, bIterator); } private AbstractTreeIterator makeIteratorFromTreeOrNull(RevTree tree) throws IncorrectObjectTypeException, IOException { if (tree != null) { CanonicalTreeParser parser = new CanonicalTreeParser(); parser.reset(reader, tree); return parser; } return new EmptyTreeIterator(); } /** * Determine the differences between two trees. * * No output is created, instead only the file paths that are different are * returned. Callers may choose to format these paths themselves, or convert * them into {@link org.eclipse.jgit.patch.FileHeader} instances with a * complete edit list by calling {@link #toFileHeader(DiffEntry)}. * * @param a * the old (or previous) side. * @param b * the new (or updated) side. * @return the paths that are different. * @throws java.io.IOException * trees cannot be read or file contents cannot be read. */ public List scan(AbstractTreeIterator a, AbstractTreeIterator b) throws IOException { assertHaveReader(); TreeWalk walk = new TreeWalk(repository, reader); int aIndex = walk.addTree(a); int bIndex = walk.addTree(b); if (repository != null) { if (a instanceof WorkingTreeIterator && b instanceof DirCacheIterator) { ((WorkingTreeIterator) a).setDirCacheIterator(walk, bIndex); } else if (b instanceof WorkingTreeIterator && a instanceof DirCacheIterator) { ((WorkingTreeIterator) b).setDirCacheIterator(walk, aIndex); } } walk.setRecursive(true); TreeFilter filter = getDiffTreeFilterFor(a, b); if (pathFilter instanceof FollowFilter) { walk.setFilter(AndTreeFilter.create( PathFilter.create(((FollowFilter) pathFilter).getPath()), filter)); } else { walk.setFilter(AndTreeFilter.create(pathFilter, filter)); } source = new ContentSource.Pair(source(a), source(b)); List files = DiffEntry.scan(walk); if (pathFilter instanceof FollowFilter && isAdd(files)) { // The file we are following was added here, find where it // came from so we can properly show the rename or copy, // then continue digging backwards. // a.reset(); b.reset(); walk.reset(); walk.addTree(a); walk.addTree(b); walk.setFilter(filter); if (renameDetector == null) setDetectRenames(true); files = updateFollowFilter(detectRenames(DiffEntry.scan(walk))); } else if (renameDetector != null) files = detectRenames(files); return files; } private static TreeFilter getDiffTreeFilterFor(AbstractTreeIterator a, AbstractTreeIterator b) { if (a instanceof DirCacheIterator && b instanceof WorkingTreeIterator) return new IndexDiffFilter(0, 1); if (a instanceof WorkingTreeIterator && b instanceof DirCacheIterator) return new IndexDiffFilter(1, 0); TreeFilter filter = TreeFilter.ANY_DIFF; if (a instanceof WorkingTreeIterator) filter = AndTreeFilter.create(new NotIgnoredFilter(0), filter); if (b instanceof WorkingTreeIterator) filter = AndTreeFilter.create(new NotIgnoredFilter(1), filter); return filter; } private ContentSource source(AbstractTreeIterator iterator) { if (iterator instanceof WorkingTreeIterator) return ContentSource.create((WorkingTreeIterator) iterator); return ContentSource.create(reader); } private List detectRenames(List files) throws IOException { renameDetector.reset(); renameDetector.addAll(files); try { return renameDetector.compute(reader, progressMonitor); } catch (CanceledException e) { // TODO: consider propagating once bug 536323 is tackled // (making DiffEntry.scan() and DiffFormatter.scan() and // format() cancellable). return Collections.emptyList(); } } private boolean isAdd(List files) { String oldPath = ((FollowFilter) pathFilter).getPath(); for (DiffEntry ent : files) { if (ent.getChangeType() == ADD && ent.getNewPath().equals(oldPath)) return true; } return false; } private List updateFollowFilter(List files) { String oldPath = ((FollowFilter) pathFilter).getPath(); for (DiffEntry ent : files) { if (isRename(ent) && ent.getNewPath().equals(oldPath)) { pathFilter = FollowFilter.create(ent.getOldPath(), diffCfg); return Collections.singletonList(ent); } } return Collections.emptyList(); } private static boolean isRename(DiffEntry ent) { return ent.getChangeType() == RENAME || ent.getChangeType() == COPY; } /** * Format the differences between two trees. * * The patch is expressed as instructions to modify {@code a} to make it * {@code b}. *
* Either side may be null to indicate that the tree has beed added or * removed. The diff will be computed against nothing. * * @param a * the old (or previous) side or null * @param b * the new (or updated) side or null * @throws java.io.IOException * trees cannot be read, file contents cannot be read, or the * patch cannot be output. */ public void format(AnyObjectId a, AnyObjectId b) throws IOException { format(scan(a, b)); } /** * Format the differences between two trees. * * The patch is expressed as instructions to modify {@code a} to make it * {@code b}. * *
* Either side may be null to indicate that the tree has beed added or * removed. The diff will be computed against nothing. * * @param a * the old (or previous) side or null * @param b * the new (or updated) side or null * @throws java.io.IOException * trees cannot be read, file contents cannot be read, or the * patch cannot be output. */ public void format(RevTree a, RevTree b) throws IOException { format(scan(a, b)); } /** * Format the differences between two trees. * * The patch is expressed as instructions to modify {@code a} to make it * {@code b}. *
* Either side may be null to indicate that the tree has beed added or * removed. The diff will be computed against nothing. * * @param a * the old (or previous) side or null * @param b * the new (or updated) side or null * @throws java.io.IOException * trees cannot be read, file contents cannot be read, or the * patch cannot be output. */ public void format(AbstractTreeIterator a, AbstractTreeIterator b) throws IOException { format(scan(a, b)); } /** * Format a patch script from a list of difference entries. Requires * {@link #scan(AbstractTreeIterator, AbstractTreeIterator)} to have been * called first. * * @param entries * entries describing the affected files. * @throws java.io.IOException * a file's content cannot be read, or the output stream cannot * be written to. */ public void format(List entries) throws IOException { for (DiffEntry ent : entries) format(ent); } /** * Format a patch script for one file entry. * * @param ent * the entry to be formatted. * @throws java.io.IOException * a file's content cannot be read, or the output stream cannot * be written to. */ public void format(DiffEntry ent) throws IOException { FormatResult res = createFormatResult(ent); format(res.header, res.a, res.b, getDiffDriver(ent)); } private static byte[] writeGitLinkText(AbbreviatedObjectId id) { if (ObjectId.zeroId().equals(id.toObjectId())) { return EMPTY; } return encodeASCII("Subproject commit " + id.name() //$NON-NLS-1$ + "\n"); //$NON-NLS-1$ } private String format(AbbreviatedObjectId id) { if (id.isComplete() && reader != null) { try { id = reader.abbreviate(id.toObjectId(), abbreviationLength); } catch (IOException cannotAbbreviate) { // Ignore this. We'll report the full identity. } } return id.name(); } private String quotePath(String path) { if (quotePaths == null || quotePaths.booleanValue()) { return QuotedString.GIT_PATH.quote(path); } return QuotedString.GIT_PATH_MINIMAL.quote(path); } /** * Format a patch script, reusing a previously parsed FileHeader. *
* This formatter is primarily useful for editing an existing patch script * to increase or reduce the number of lines of context within the script. * All header lines are reused as-is from the supplied FileHeader. * * @param head * existing file header containing the header lines to copy. * @param a * text source for the pre-image version of the content. This * must match the content of * {@link org.eclipse.jgit.patch.FileHeader#getOldId()}. * @param b * text source for the post-image version of the content. This * must match the content of * {@link org.eclipse.jgit.patch.FileHeader#getNewId()}. * @param diffDriver * the diff driver used to obtain function names in hunk headers * @throws java.io.IOException * writing to the supplied stream failed. * @since 6.10.1 */ public void format(FileHeader head, RawText a, RawText b, DiffDriver diffDriver) throws IOException { // Reuse the existing FileHeader as-is by blindly copying its // header lines, but avoiding its hunks. Instead we recreate // the hunks from the text instances we have been supplied. // final int start = head.getStartOffset(); int end = head.getEndOffset(); if (!head.getHunks().isEmpty()) end = head.getHunks().get(0).getStartOffset(); out.write(head.getBuffer(), start, end - start); if (head.getPatchType() == PatchType.UNIFIED) { format(head.toEditList(), a, b, diffDriver); } } /** * Format a patch script, reusing a previously parsed FileHeader. *
* This formatter is primarily useful for editing an existing patch script * to increase or reduce the number of lines of context within the script. * All header lines are reused as-is from the supplied FileHeader. * * @param head * existing file header containing the header lines to copy. * @param a * text source for the pre-image version of the content. This must match * the content of {@link org.eclipse.jgit.patch.FileHeader#getOldId()}. * @param b * text source for the post-image version of the content. This must match * the content of {@link org.eclipse.jgit.patch.FileHeader#getNewId()}. * @throws java.io.IOException * writing to the supplied stream failed. */ public void format(FileHeader head, RawText a, RawText b) throws IOException { format(head, a, b, null); } /** * Formats a list of edits in unified diff format * * @param edits * some differences which have been calculated between A and B * @param a * the text A which was compared * @param b * the text B which was compared * @throws java.io.IOException * if an IO error occurred */ public void format(EditList edits, RawText a, RawText b) throws IOException { format(edits, a, b, null); } /** * Formats a list of edits in unified diff format * * @param edits * some differences which have been calculated between A and B * @param a * the text A which was compared * @param b * the text B which was compared * @param diffDriver * the diff driver used to obtain function names in hunk headers * @throws java.io.IOException * if an IO error occurred * @since 6.10.1 */ public void format(EditList edits, RawText a, RawText b, DiffDriver diffDriver) throws IOException { for (int curIdx = 0; curIdx < edits.size();) { Edit curEdit = edits.get(curIdx); final int endIdx = findCombinedEnd(edits, curIdx); final Edit endEdit = edits.get(endIdx); int aCur = (int) Math.max(0, (long) curEdit.getBeginA() - context); int bCur = (int) Math.max(0, (long) curEdit.getBeginB() - context); final int aEnd = (int) Math.min(a.size(), (long) endEdit.getEndA() + context); final int bEnd = (int) Math.min(b.size(), (long) endEdit.getEndB() + context); writeHunkHeader(aCur, aEnd, bCur, bEnd, getFuncName(a, aCur - 1, diffDriver)); while (aCur < aEnd || bCur < bEnd) { if (aCur < curEdit.getBeginA() || endIdx + 1 < curIdx) { writeContextLine(a, aCur); if (isEndOfLineMissing(a, aCur)) out.write(noNewLine); aCur++; bCur++; } else if (aCur < curEdit.getEndA()) { writeRemovedLine(a, aCur); if (isEndOfLineMissing(a, aCur)) out.write(noNewLine); aCur++; } else if (bCur < curEdit.getEndB()) { writeAddedLine(b, bCur); if (isEndOfLineMissing(b, bCur)) out.write(noNewLine); bCur++; } if (end(curEdit, aCur, bCur) && ++curIdx < edits.size()) curEdit = edits.get(curIdx); } } } /** * Output a line of context (unmodified line). * * @param text * RawText for accessing raw data * @param line * the line number within text * @throws java.io.IOException * if an IO error occurred */ protected void writeContextLine(RawText text, int line) throws IOException { writeLine(' ', text, line); } private static boolean isEndOfLineMissing(RawText text, int line) { return line + 1 == text.size() && text.isMissingNewlineAtEnd(); } /** * Output an added line. * * @param text * RawText for accessing raw data * @param line * the line number within text * @throws java.io.IOException * if an IO error occurred */ protected void writeAddedLine(RawText text, int line) throws IOException { writeLine('+', text, line); } /** * Output a removed line * * @param text * RawText for accessing raw data * @param line * the line number within text * @throws java.io.IOException * if an IO error occurred */ protected void writeRemovedLine(RawText text, int line) throws IOException { writeLine('-', text, line); } /** * Output a hunk header * * @param aStartLine * within first source * @param aEndLine * within first source * @param bStartLine * within second source * @param bEndLine * within second source * @throws java.io.IOException * if an IO error occurred */ protected void writeHunkHeader(int aStartLine, int aEndLine, int bStartLine, int bEndLine) throws IOException { writeHunkHeader(aStartLine, aEndLine, bStartLine, bEndLine, null); } /** * Output a hunk header * * @param aStartLine * within first source * @param aEndLine * within first source * @param bStartLine * within second source * @param bEndLine * within second source * @param funcName * function name of this hunk * @throws java.io.IOException * if an IO error occurred * @since 6.10.1 */ protected void writeHunkHeader(int aStartLine, int aEndLine, int bStartLine, int bEndLine, String funcName) throws IOException { out.write('@'); out.write('@'); writeRange('-', aStartLine + 1, aEndLine - aStartLine); writeRange('+', bStartLine + 1, bEndLine - bStartLine); out.write(' '); out.write('@'); out.write('@'); if (funcName != null) { out.write(' '); out.write(funcName.getBytes()); } out.write('\n'); } private void writeRange(char prefix, int begin, int cnt) throws IOException { out.write(' '); out.write(prefix); switch (cnt) { case 0: // If the range is empty, its beginning number must be the // line just before the range, or 0 if the range is at the // start of the file stream. Here, begin is always 1 based, // so an empty file would produce "0,0". // out.write(encodeASCII(begin - 1)); out.write(','); out.write('0'); break; case 1: // If the range is exactly one line, produce only the number. // out.write(encodeASCII(begin)); break; default: out.write(encodeASCII(begin)); out.write(','); out.write(encodeASCII(cnt)); break; } } /** * Write a standard patch script line. * * @param prefix * prefix before the line, typically '-', '+', ' '. * @param text * the text object to obtain the line from. * @param cur * line number to output. * @throws java.io.IOException * the stream threw an exception while writing to it. */ protected void writeLine(final char prefix, final RawText text, final int cur) throws IOException { out.write(prefix); text.writeLine(out, cur); out.write('\n'); } /** * Creates a {@link org.eclipse.jgit.patch.FileHeader} representing the * given {@link org.eclipse.jgit.diff.DiffEntry} *
* This method does not use the OutputStream associated with this * DiffFormatter instance. It is therefore safe to instantiate this * DiffFormatter instance with a * {@link org.eclipse.jgit.util.io.DisabledOutputStream} if this method is * the only one that will be used. * * @param ent * the DiffEntry to create the FileHeader for * @return a FileHeader representing the DiffEntry. The FileHeader's buffer * will contain only the header of the diff output. It will also * contain one {@link org.eclipse.jgit.patch.HunkHeader}. * @throws java.io.IOException * the stream threw an exception while writing to it, or one of * the blobs referenced by the DiffEntry could not be read. * @throws org.eclipse.jgit.errors.CorruptObjectException * one of the blobs referenced by the DiffEntry is corrupt. * @throws org.eclipse.jgit.errors.MissingObjectException * one of the blobs referenced by the DiffEntry is missing. */ public FileHeader toFileHeader(DiffEntry ent) throws IOException, CorruptObjectException, MissingObjectException { return createFormatResult(ent).header; } private static class FormatResult { FileHeader header; RawText a; RawText b; } private FormatResult createFormatResult(DiffEntry ent) throws IOException, CorruptObjectException, MissingObjectException { final FormatResult res = new FormatResult(); ByteArrayOutputStream buf = new ByteArrayOutputStream(); final EditList editList; final FileHeader.PatchType type; formatHeader(buf, ent); if (ent.getOldId() == null || ent.getNewId() == null) { // Content not changed (e.g. only mode, pure rename) editList = new EditList(); type = PatchType.UNIFIED; res.header = new FileHeader(buf.toByteArray(), editList, type); return res; } assertHaveReader(); RawText aRaw = null; RawText bRaw = null; if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) { aRaw = new RawText(writeGitLinkText(ent.getOldId())); bRaw = new RawText(writeGitLinkText(ent.getNewId())); } else { try { aRaw = open(OLD, ent); bRaw = open(NEW, ent); } catch (BinaryBlobException e) { // Do nothing; we check for null below. formatOldNewPaths(buf, ent); buf.write(encodeASCII("Binary files differ\n")); //$NON-NLS-1$ editList = new EditList(); type = PatchType.BINARY; res.header = new FileHeader(buf.toByteArray(), editList, type); return res; } } res.a = aRaw; res.b = bRaw; editList = diff(res.a, res.b); type = PatchType.UNIFIED; switch (ent.getChangeType()) { case RENAME: case COPY: if (!editList.isEmpty()) formatOldNewPaths(buf, ent); break; default: formatOldNewPaths(buf, ent); break; } res.header = new FileHeader(buf.toByteArray(), editList, type); return res; } private EditList diff(RawText a, RawText b) { return diffAlgorithm.diff(comparator, a, b); } private void assertHaveReader() { if (reader == null) { throw new IllegalStateException(JGitText.get().readerIsRequired); } } private RawText open(DiffEntry.Side side, DiffEntry entry) throws IOException, BinaryBlobException { if (entry.getMode(side) == FileMode.MISSING) return RawText.EMPTY_TEXT; if (entry.getMode(side).getObjectType() != Constants.OBJ_BLOB) return RawText.EMPTY_TEXT; AbbreviatedObjectId id = entry.getId(side); if (!id.isComplete()) { Collection ids = reader.resolve(id); if (ids.size() == 1) { id = AbbreviatedObjectId.fromObjectId(ids.iterator().next()); switch (side) { case OLD: entry.oldId = id; break; case NEW: entry.newId = id; break; } } else if (ids.isEmpty()) throw new MissingObjectException(id, Constants.OBJ_BLOB); else throw new AmbiguousObjectException(id, ids); } ObjectLoader ldr = LfsFactory.getInstance().applySmudgeFilter(repository, source.open(side, entry), entry.getDiffAttribute()); return RawText.load(ldr, binaryFileThreshold); } /** * Output the first header line * * @param o * The stream the formatter will write the first header line to * @param type * The {@link org.eclipse.jgit.diff.DiffEntry.ChangeType} * @param oldPath * old path to the file * @param newPath * new path to the file * @throws java.io.IOException * the stream threw an exception while writing to it. */ protected void formatGitDiffFirstHeaderLine(ByteArrayOutputStream o, final ChangeType type, final String oldPath, final String newPath) throws IOException { o.write(encodeASCII("diff --git ")); //$NON-NLS-1$ o.write(encode(quotePath(oldPrefix + (type == ADD ? newPath : oldPath)))); o.write(' '); o.write(encode(quotePath(newPrefix + (type == DELETE ? oldPath : newPath)))); o.write('\n'); } private void formatHeader(ByteArrayOutputStream o, DiffEntry ent) throws IOException { final ChangeType type = ent.getChangeType(); final String oldp = ent.getOldPath(); final String newp = ent.getNewPath(); final FileMode oldMode = ent.getOldMode(); final FileMode newMode = ent.getNewMode(); formatGitDiffFirstHeaderLine(o, type, oldp, newp); if ((type == MODIFY || type == COPY || type == RENAME) && !oldMode.equals(newMode)) { o.write(encodeASCII("old mode ")); //$NON-NLS-1$ oldMode.copyTo(o); o.write('\n'); o.write(encodeASCII("new mode ")); //$NON-NLS-1$ newMode.copyTo(o); o.write('\n'); } switch (type) { case ADD: o.write(encodeASCII("new file mode ")); //$NON-NLS-1$ newMode.copyTo(o); o.write('\n'); break; case DELETE: o.write(encodeASCII("deleted file mode ")); //$NON-NLS-1$ oldMode.copyTo(o); o.write('\n'); break; case RENAME: o.write(encodeASCII("similarity index " + ent.getScore() + "%")); //$NON-NLS-1$ //$NON-NLS-2$ o.write('\n'); o.write(encode("rename from " + quotePath(oldp))); //$NON-NLS-1$ o.write('\n'); o.write(encode("rename to " + quotePath(newp))); //$NON-NLS-1$ o.write('\n'); break; case COPY: o.write(encodeASCII("similarity index " + ent.getScore() + "%")); //$NON-NLS-1$ //$NON-NLS-2$ o.write('\n'); o.write(encode("copy from " + quotePath(oldp))); //$NON-NLS-1$ o.write('\n'); o.write(encode("copy to " + quotePath(newp))); //$NON-NLS-1$ o.write('\n'); break; case MODIFY: if (0 < ent.getScore()) { o.write(encodeASCII("dissimilarity index " //$NON-NLS-1$ + (100 - ent.getScore()) + "%")); //$NON-NLS-1$ o.write('\n'); } break; } if (ent.getOldId() != null && !ent.getOldId().equals(ent.getNewId())) { formatIndexLine(o, ent); } } /** * Format index line * * @param o * the stream the formatter will write line data to * @param ent * the DiffEntry to create the FileHeader for * @throws java.io.IOException * writing to the supplied stream failed. */ protected void formatIndexLine(OutputStream o, DiffEntry ent) throws IOException { o.write(encodeASCII("index " // //$NON-NLS-1$ + format(ent.getOldId()) // + ".." // //$NON-NLS-1$ + format(ent.getNewId()))); if (ent.getOldMode().equals(ent.getNewMode())) { o.write(' '); ent.getNewMode().copyTo(o); } o.write('\n'); } private void formatOldNewPaths(ByteArrayOutputStream o, DiffEntry ent) throws IOException { if (ent.oldId.equals(ent.newId)) return; final String oldp; final String newp; switch (ent.getChangeType()) { case ADD: oldp = DiffEntry.DEV_NULL; newp = quotePath(newPrefix + ent.getNewPath()); break; case DELETE: oldp = quotePath(oldPrefix + ent.getOldPath()); newp = DiffEntry.DEV_NULL; break; default: oldp = quotePath(oldPrefix + ent.getOldPath()); newp = quotePath(newPrefix + ent.getNewPath()); break; } o.write(encode("--- " + oldp + "\n")); //$NON-NLS-1$ //$NON-NLS-2$ o.write(encode("+++ " + newp + "\n")); //$NON-NLS-1$ //$NON-NLS-2$ } private int findCombinedEnd(List edits, int i) { int end = i + 1; while (end < edits.size() && (combineA(edits, end) || combineB(edits, end))) end++; return end - 1; } private boolean combineA(List e, int i) { return e.get(i).getBeginA() - e.get(i - 1).getEndA() <= 2 * context; } private boolean combineB(List e, int i) { return e.get(i).getBeginB() - e.get(i - 1).getEndB() <= 2 * context; } private static boolean end(Edit edit, int a, int b) { return edit.getEndA() <= a && edit.getEndB() <= b; } private String getFuncName(RawText text, int startAt, DiffDriver diffDriver) { if (diffDriver != null) { while (startAt > 0) { String line = text.getString(startAt); startAt--; if (matchesAny(diffDriver.getNegatePatterns(), line)) { continue; } if (matchesAny(diffDriver.getMatchPatterns(), line)) { String funcName = line.replaceAll("^[ \\t]+", ""); //$NON-NLS-1$//$NON-NLS-2$ return funcName.substring(0, Math.min(funcName.length(), 80)).trim(); } } } return null; } private boolean matchesAny(List patterns, String text) { if (patterns != null) { for (Pattern p : patterns) { if (p.matcher(text).find()) { return true; } } } return false; } private DiffDriver getDiffDriver(DiffEntry entry) { Attribute diffAttr = entry.getDiffAttribute(); if (diffAttr == null) { return null; } String diffAttrValue = diffAttr.getValue(); if (diffAttrValue == null) { return null; } try { return DiffDriver.valueOf(diffAttrValue); } catch (IllegalArgumentException e) { return null; } } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 5764 Content-Disposition: inline; filename="Edit.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "2968dbaa8f321c374fd2e271905399e54a48446f" /* * Copyright (C) 2008-2009, Johannes E. Schindelin and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; /** * A modified region detected between two versions of roughly the same content. *
* An edit covers the modified region only. It does not cover a common region. *
* Regions should be specified using 0 based notation, so add 1 to the start and * end marks for line numbers in a file. *
* An edit where {@code beginA == endA && beginB < endB} is an insert edit, that * is sequence B inserted the elements in region [beginB, endB) at * beginA. *
* An edit where {@code beginA < endA && beginB == endB} is a delete edit, that * is sequence B has removed the elements between [beginA, endA). *
* An edit where {@code beginA < endA && beginB < endB} is a replace edit, that * is sequence B has replaced the range of elements between * [beginA, endA) with those found in [beginB, endB). */ public class Edit { /** Type of edit */ public enum Type { /** Sequence B has inserted the region. */ INSERT, /** Sequence B has removed the region. */ DELETE, /** Sequence B has replaced the region with different content. */ REPLACE, /** Sequence A and B have zero length, describing nothing. */ EMPTY; } int beginA; int endA; int beginB; int endB; /** * Create a new empty edit. * * @param as * beginA: start and end of region in sequence A; 0 based. * @param bs * beginB: start and end of region in sequence B; 0 based. */ public Edit(int as, int bs) { this(as, as, bs, bs); } /** * Create a new edit. * * @param as * beginA: start of region in sequence A; 0 based. * @param ae * endA: end of region in sequence A; must be >= as. * @param bs * beginB: start of region in sequence B; 0 based. * @param be * endB: end of region in sequence B; must be > = bs. */ public Edit(int as, int ae, int bs, int be) { beginA = as; endA = ae; beginB = bs; endB = be; } /** * Get type * * @return the type of this region */ public final Type getType() { if (beginA < endA) { if (beginB < endB) { return Type.REPLACE; } return Type.DELETE; } if (beginB < endB) { return Type.INSERT; } // beginB == endB) return Type.EMPTY; } /** * Whether edit is empty * * @return {@code true} if the edit is empty (lengths of both a and b is * zero) */ public final boolean isEmpty() { return beginA == endA && beginB == endB; } /** * Get start point in sequence A * * @return start point in sequence A */ public final int getBeginA() { return beginA; } /** * Get end point in sequence A * * @return end point in sequence A */ public final int getEndA() { return endA; } /** * Get start point in sequence B * * @return start point in sequence B */ public final int getBeginB() { return beginB; } /** * Get end point in sequence B * * @return end point in sequence B */ public final int getEndB() { return endB; } /** * Get length of the region in A * * @return length of the region in A */ public final int getLengthA() { return endA - beginA; } /** * Get length of the region in B * * @return return length of the region in B */ public final int getLengthB() { return endB - beginB; } /** * Move the edit region by the specified amount. * * @param amount * the region is shifted by this amount, and can be positive or * negative. * @since 4.8 */ public final void shift(int amount) { beginA += amount; endA += amount; beginB += amount; endB += amount; } /** * Construct a new edit representing the region before cut. * * @param cut * the cut point. The beginning A and B points are used as the * end points of the returned edit. * @return an edit representing the slice of {@code this} edit that occurs * before {@code cut} starts. */ public final Edit before(Edit cut) { return new Edit(beginA, cut.beginA, beginB, cut.beginB); } /** * Construct a new edit representing the region after cut. * * @param cut * the cut point. The ending A and B points are used as the * starting points of the returned edit. * @return an edit representing the slice of {@code this} edit that occurs * after {@code cut} ends. */ public final Edit after(Edit cut) { return new Edit(cut.endA, endA, cut.endB, endB); } /** * Increase {@link #getEndA()} by 1. */ public void extendA() { endA++; } /** * Increase {@link #getEndB()} by 1. */ public void extendB() { endB++; } /** * Swap A and B, so the edit goes the other direction. */ public void swap() { final int sBegin = beginA; final int sEnd = endA; beginA = beginB; endA = endB; beginB = sBegin; endB = sEnd; } @Override public int hashCode() { return beginA ^ endA; } @Override public boolean equals(Object o) { if (o instanceof Edit) { final Edit e = (Edit) o; return this.beginA == e.beginA && this.endA == e.endA && this.beginB == e.beginB && this.endB == e.endB; } return false; } @SuppressWarnings("nls") @Override public String toString() { final Type t = getType(); return t + "(" + beginA + "-" + endA + "," + beginB + "-" + endB + ")"; } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 1316 Content-Disposition: inline; filename="EditList.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "85e23e9a92c11e7e21d42a47bfb7a993edb831a8" /* * Copyright (C) 2009, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import java.util.ArrayList; /** * Specialized list of {@link org.eclipse.jgit.diff.Edit}s in a document. */ public class EditList extends ArrayList { private static final long serialVersionUID = 1L; /** * Construct an edit list containing a single edit. * * @param edit * the edit to return in the list. * @return list containing only {@code edit}. */ public static EditList singleton(Edit edit) { EditList res = new EditList(1); res.add(edit); return res; } /** * Create a new, empty edit list. */ public EditList() { super(16); } /** * Create an empty edit list with the specified capacity. * * @param capacity * the initial capacity of the edit list. If additional edits are * added to the list, it will be grown to support them. */ public EditList(int capacity) { super(capacity); } @Override public String toString() { return "EditList" + super.toString(); //$NON-NLS-1$ } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 1092 Content-Disposition: inline; filename="HashedSequence.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "82ab21c11e0b649fd642e0036635d6cf163278a1" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; /** * Wraps a {@link org.eclipse.jgit.diff.Sequence} to assign hash codes to * elements. *
* This sequence acts as a proxy for the real sequence, caching element hash * codes so they don't need to be recomputed each time. Sequences of this type * must be used with a {@link org.eclipse.jgit.diff.HashedSequenceComparator}. *
* To construct an instance of this type use * {@link org.eclipse.jgit.diff.HashedSequencePair}. * * @param * the base sequence type. */ public final class HashedSequence extends Sequence { final S base; final int[] hashes; HashedSequence(S base, int[] hashes) { this.base = base; this.hashes = hashes; } @Override public int size() { return base.size(); } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 1374 Content-Disposition: inline; filename="HashedSequenceComparator.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "c7dfd9a18f77771c07c8d496cafd3b35d83c8178" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; /** * Wrap another comparator for use with * {@link org.eclipse.jgit.diff.HashedSequence}. *
* This comparator acts as a proxy for the real comparator, evaluating the * cached hash code before testing the underlying comparator's equality. * Comparators of this type must be used with a * {@link org.eclipse.jgit.diff.HashedSequence}. *
* To construct an instance of this type use * {@link org.eclipse.jgit.diff.HashedSequencePair}. * * @param * the base sequence type. */ public final class HashedSequenceComparator extends SequenceComparator> { private final SequenceComparator cmp; HashedSequenceComparator(SequenceComparator cmp) { this.cmp = cmp; } @Override public boolean equals(HashedSequence a, int ai, // HashedSequence b, int bi) { return a.hashes[ai] == b.hashes[bi] && cmp.equals(a.base, ai, b.base, bi); } @Override public int hash(HashedSequence seq, int ptr) { return seq.hashes[ptr]; } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 2007 Content-Disposition: inline; filename="HashedSequencePair.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "870ca08497b48d907a31a5ce60e58f6b72344d87" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; /** * Wraps two {@link org.eclipse.jgit.diff.Sequence} instances to cache their * element hash codes. *
* This pair wraps two sequences that contain cached hash codes for the input * sequences. * * @param * the base sequence type. */ public class HashedSequencePair { private final SequenceComparator cmp; private final S baseA; private final S baseB; private HashedSequence cachedA; private HashedSequence cachedB; /** * Construct a pair to provide fast hash codes. * * @param cmp * the base comparator for the sequence elements. * @param a * the A sequence. * @param b * the B sequence. */ public HashedSequencePair(SequenceComparator cmp, S a, S b) { this.cmp = cmp; this.baseA = a; this.baseB = b; } /** * Get comparator * * @return obtain a comparator that uses the cached hash codes */ public HashedSequenceComparator getComparator() { return new HashedSequenceComparator<>(cmp); } /** * Get A * * @return wrapper around A that includes cached hash codes */ public HashedSequence getA() { if (cachedA == null) cachedA = wrap(baseA); return cachedA; } /** * Get B * * @return wrapper around B that includes cached hash codes */ public HashedSequence getB() { if (cachedB == null) cachedB = wrap(baseB); return cachedB; } private HashedSequence wrap(S base) { final int end = base.size(); final int[] hashes = new int[end]; for (int ptr = 0; ptr < end; ptr++) hashes[ptr] = cmp.hash(base, ptr); return new HashedSequence<>(base, hashes); } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 6609 Content-Disposition: inline; filename="HistogramDiff.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "bb72e1faedb3edfba058ec1ed5fdd09e2fd4ab36" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import java.util.ArrayList; import java.util.List; /** * An extended form of Bram Cohen's patience diff algorithm. *
~~* This implementation was derived by using the 4 rules that are outlined in * Bram Cohen's blog, * and then was further extended to support low-occurrence common elements. *~~
* The basic idea of the algorithm is to create a histogram of occurrences for * each element of sequence A. Each element of sequence B is then considered in * turn. If the element also exists in sequence A, and has a lower occurrence * count, the positions are considered as a candidate for the longest common * subsequence (LCS). After scanning of B is complete the LCS that has the * lowest number of occurrences is chosen as a split point. The region is split * around the LCS, and the algorithm is recursively applied to the sections * before and after the LCS. *
* By always selecting a LCS position with the lowest occurrence count, this * algorithm behaves exactly like Bram Cohen's patience diff whenever there is a * unique common element available between the two sequences. When no unique * elements exist, the lowest occurrence element is chosen instead. This offers * more readable diffs than simply falling back on the standard Myers' O(ND) * algorithm would produce. *
* To prevent the algorithm from having an O(N^2) running time, an upper limit * on the number of unique elements in a histogram bucket is configured by * {@link #setMaxChainLength(int)}. If sequence A has more than this many * elements that hash into the same hash bucket, the algorithm passes the region * to {@link #setFallbackAlgorithm(DiffAlgorithm)}. If no fallback algorithm is * configured, the region is emitted as a replace edit. *
* During scanning of sequence B, any element of A that occurs more than * {@link #setMaxChainLength(int)} times is never considered for an LCS match * position, even if it is common between the two sequences. This limits the * number of locations in sequence A that must be considered to find the LCS, * and helps maintain a lower running time bound. *
* So long as {@link #setMaxChainLength(int)} is a small constant (such as 64), * the algorithm runs in O(N * D) time, where N is the sum of the input lengths * and D is the number of edits in the resulting EditList. If the supplied * {@link org.eclipse.jgit.diff.SequenceComparator} has a good hash function, * this implementation typically out-performs * {@link org.eclipse.jgit.diff.MyersDiff}, even though its theoretical running * time is the same. *
* This implementation has an internal limitation that prevents it from handling * sequences with more than 268,435,456 (2^28) elements. */ public class HistogramDiff extends LowLevelDiffAlgorithm { /** Algorithm to use when there are too many element occurrences. */ DiffAlgorithm fallback = MyersDiff.INSTANCE; /** * Maximum number of positions to consider for a given element hash. * * All elements with the same hash are stored into a single chain. The chain * size is capped to ensure search is linear time at O(len_A + len_B) rather * than quadratic at O(len_A * len_B). */ int maxChainLength = 64; /** * Set the algorithm used when there are too many element occurrences. * * @param alg * the secondary algorithm. If null the region will be denoted as * a single REPLACE block. */ public void setFallbackAlgorithm(DiffAlgorithm alg) { fallback = alg; } /** * Maximum number of positions to consider for a given element hash. * * All elements with the same hash are stored into a single chain. The chain * size is capped to ensure search is linear time at O(len_A + len_B) rather * than quadratic at O(len_A * len_B). * * @param maxLen * new maximum length. */ public void setMaxChainLength(int maxLen) { maxChainLength = maxLen; } @Override public void diffNonCommon(EditList edits, HashedSequenceComparator cmp, HashedSequence a, HashedSequence b, Edit region) { new State<>(edits, cmp, a, b).diffRegion(region); } private class State { private final HashedSequenceComparator cmp; private final HashedSequence a; private final HashedSequence b; private final List queue = new ArrayList<>(); /** Result edits we have determined that must be made to convert a to b. */ final EditList edits; State(EditList edits, HashedSequenceComparator cmp, HashedSequence a, HashedSequence b) { this.cmp = cmp; this.a = a; this.b = b; this.edits = edits; } void diffRegion(Edit r) { diffReplace(r); while (!queue.isEmpty()) diff(queue.remove(queue.size() - 1)); } private void diffReplace(Edit r) { Edit lcs = new HistogramDiffIndex<>(maxChainLength, cmp, a, b, r) .findLongestCommonSequence(); if (lcs != null) { // If we were given an edit, we can prove a result here. // if (lcs.isEmpty()) { // An empty edit indicates there is nothing in common. // Replace the entire region. // edits.add(r); } else { queue.add(r.after(lcs)); queue.add(r.before(lcs)); } } else if (fallback instanceof LowLevelDiffAlgorithm) { LowLevelDiffAlgorithm fb = (LowLevelDiffAlgorithm) fallback; fb.diffNonCommon(edits, cmp, a, b, r); } else if (fallback != null) { SubsequenceComparator> cs = subcmp(); Subsequence> as = Subsequence.a(a, r); Subsequence> bs = Subsequence.b(b, r); EditList res = fallback.diffNonCommon(cs, as, bs); edits.addAll(Subsequence.toBase(res, as, bs)); } else { edits.add(r); } } private void diff(Edit r) { switch (r.getType()) { case INSERT: case DELETE: edits.add(r); break; case REPLACE: if (r.getLengthA() == 1 && r.getLengthB() == 1) edits.add(r); else diffReplace(r); break; case EMPTY: default: throw new IllegalStateException(); } } private SubsequenceComparator> subcmp() { return new SubsequenceComparator<>(cmp); } } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 8705 Content-Disposition: inline; filename="HistogramDiffIndex.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "8ae2bcf31724a6e589fb08fb600cacb5e0713bd0" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import org.eclipse.jgit.internal.JGitText; /** * Support {@link HistogramDiff} by computing occurrence counts of elements. *
* Each element in the range being considered is put into a hash table, tracking * the number of times that distinct element appears in the sequence. Once all * elements have been inserted from sequence A, each element of sequence B is * probed in the hash table and the longest common subsequence with the lowest * occurrence count in A is used as the result. * * @param * type of the base sequence. */ final class HistogramDiffIndex { private static final int REC_NEXT_SHIFT = 28 + 8; private static final int REC_PTR_SHIFT = 8; private static final int REC_PTR_MASK = (1 << 28) - 1; private static final int REC_CNT_MASK = (1 << 8) - 1; private static final int MAX_PTR = REC_PTR_MASK; private static final int MAX_CNT = (1 << 8) - 1; private final int maxChainLength; private final HashedSequenceComparator cmp; private final HashedSequence a; private final HashedSequence b; private final Edit region; /** Keyed by {@link #hash(HashedSequence, int)} for {@link #recs} index. */ private final int[] table; /** Number of low bits to discard from a key to index {@link #table}. */ private final int keyShift; /** * Describes a unique element in sequence A. * * The records in this table are actually 3-tuples of: *
*
index of next record in this table that has same hash code
*
index of first element in this occurrence chain
*
occurrence count for this element (length of locs list)
*
* * The occurrence count is capped at {@link #MAX_CNT}, as the field is only * a few bits wide. Elements that occur more frequently will have their * count capped. */ private long[] recs; /** Number of elements in {@link #recs}; also is the unique element count. */ private int recCnt; /** * For {@code ptr}, {@code next[ptr - ptrShift]} has subsequent index. * * For the sequence element {@code ptr}, the value stored at location * {@code next[ptr - ptrShift]} is the next occurrence of the exact same * element in the sequence. * * Chains always run from the lowest index to the largest index. Therefore * the array will store {@code next[1] = 2}, but never {@code next[2] = 1}. * This allows a chain to terminate with {@code 0}, as {@code 0} would never * be a valid next element. * * The array is sized to be {@code region.getLengthA()} and element indexes * are converted to array indexes by subtracting {@link #ptrShift}, which is * just a cached version of {@code region.beginA}. */ private int[] next; /** * For element {@code ptr} in A, index of the record in {@link #recs} array. * * The record at {@code recs[recIdx[ptr - ptrShift]]} is the record * describing all occurrences of the element appearing in sequence A at * position {@code ptr}. The record is needed to get the occurrence count of * the element, or to locate all other occurrences of that element within * sequence A. This index provides constant-time access to the record, and * avoids needing to scan the hash chain. */ private int[] recIdx; /** Value to subtract from element indexes to key {@link #next} array. */ private int ptrShift; private Edit lcs; private int cnt; private boolean hasCommon; HistogramDiffIndex(int maxChainLength, HashedSequenceComparator cmp, HashedSequence a, HashedSequence b, Edit r) { this.maxChainLength = maxChainLength; this.cmp = cmp; this.a = a; this.b = b; this.region = r; if (region.endA >= MAX_PTR) throw new IllegalArgumentException( JGitText.get().sequenceTooLargeForDiffAlgorithm); final int sz = r.getLengthA(); final int tableBits = tableBits(sz); table = new int[1 << tableBits]; keyShift = 32 - tableBits; ptrShift = r.beginA; recs = new long[Math.max(4, sz >>> 3)]; next = new int[sz]; recIdx = new int[sz]; } Edit findLongestCommonSequence() { if (!scanA()) return null; lcs = new Edit(0, 0); cnt = maxChainLength + 1; for (int bPtr = region.beginB; bPtr < region.endB;) bPtr = tryLongestCommonSequence(bPtr); return hasCommon && maxChainLength < cnt ? null : lcs; } private boolean scanA() { // Scan the elements backwards, inserting them into the hash table // as we go. Going in reverse places the earliest occurrence of any // element at the start of the chain, so we consider earlier matches // before later matches. // SCAN: for (int ptr = region.endA - 1; region.beginA <= ptr; ptr--) { final int tIdx = hash(a, ptr); int chainLen = 0; for (int rIdx = table[tIdx]; rIdx != 0;) { final long rec = recs[rIdx]; if (cmp.equals(a, recPtr(rec), a, ptr)) { // ptr is identical to another element. Insert it onto // the front of the existing element chain. // int newCnt = recCnt(rec) + 1; if (MAX_CNT < newCnt) newCnt = MAX_CNT; recs[rIdx] = recCreate(recNext(rec), ptr, newCnt); next[ptr - ptrShift] = recPtr(rec); recIdx[ptr - ptrShift] = rIdx; continue SCAN; } rIdx = recNext(rec); chainLen++; } if (chainLen == maxChainLength) return false; // This is the first time we have ever seen this particular // element in the sequence. Construct a new chain for it. // final int rIdx = ++recCnt; if (rIdx == recs.length) { int sz = Math.min(recs.length << 1, 1 + region.getLengthA()); long[] n = new long[sz]; System.arraycopy(recs, 0, n, 0, recs.length); recs = n; } recs[rIdx] = recCreate(table[tIdx], ptr, 1); recIdx[ptr - ptrShift] = rIdx; table[tIdx] = rIdx; } return true; } private int tryLongestCommonSequence(int bPtr) { int bNext = bPtr + 1; int rIdx = table[hash(b, bPtr)]; for (long rec; rIdx != 0; rIdx = recNext(rec)) { rec = recs[rIdx]; // If there are more occurrences in A, don't use this chain. if (recCnt(rec) > cnt) { if (!hasCommon) hasCommon = cmp.equals(a, recPtr(rec), b, bPtr); continue; } int as = recPtr(rec); if (!cmp.equals(a, as, b, bPtr)) continue; hasCommon = true; TRY_LOCATIONS: for (;;) { int np = next[as - ptrShift]; int bs = bPtr; int ae = as + 1; int be = bs + 1; int rc = recCnt(rec); while (region.beginA < as && region.beginB < bs && cmp.equals(a, as - 1, b, bs - 1)) { as--; bs--; if (1 < rc) rc = Math.min(rc, recCnt(recs[recIdx[as - ptrShift]])); } while (ae < region.endA && be < region.endB && cmp.equals(a, ae, b, be)) { if (1 < rc) rc = Math.min(rc, recCnt(recs[recIdx[ae - ptrShift]])); ae++; be++; } if (bNext < be) bNext = be; if (lcs.getLengthA() < ae - as || rc < cnt) { // If this region is the longest, or there are less // occurrences of it in A, its now our LCS. // lcs.beginA = as; lcs.beginB = bs; lcs.endA = ae; lcs.endB = be; cnt = rc; } // Because we added elements in reverse order index 0 // cannot possibly be the next position. Its the first // element of the sequence and thus would have been the // value of as at the start of the TRY_LOCATIONS loop. // if (np == 0) break TRY_LOCATIONS; while (np < ae) { // The next location to consider was actually within // the LCS we examined above. Don't reconsider it. // np = next[np - ptrShift]; if (np == 0) break TRY_LOCATIONS; } as = np; } } return bNext; } private int hash(HashedSequence s, int idx) { return (cmp.hash(s, idx) * 0x9e370001 /* mix bits */) >>> keyShift; } private static long recCreate(int next, int ptr, int cnt) { return ((long) next << REC_NEXT_SHIFT) // | ((long) ptr << REC_PTR_SHIFT) // | cnt; } private static int recNext(long rec) { return (int) (rec >>> REC_NEXT_SHIFT); } private static int recPtr(long rec) { return ((int) (rec >>> REC_PTR_SHIFT)) & REC_PTR_MASK; } private static int recCnt(long rec) { return ((int) rec) & REC_CNT_MASK; } private static int tableBits(int sz) { int bits = 31 - Integer.numberOfLeadingZeros(sz); if (bits == 0) bits = 1; if (1 << bits < sz) bits++; return bits; } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 2476 Content-Disposition: inline; filename="LowLevelDiffAlgorithm.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "39be43d12d62a0e60a5aa2ff329794ea87e1a8e0" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; /** * Compares two sequences primarily based upon hash codes. */ public abstract class LowLevelDiffAlgorithm extends DiffAlgorithm { @Override public EditList diffNonCommon( SequenceComparator cmp, S a, S b) { HashedSequencePair p = new HashedSequencePair<>(cmp, a, b); HashedSequenceComparator hc = p.getComparator(); HashedSequence ha = p.getA(); HashedSequence hb = p.getB(); p = null; EditList res = new EditList(); Edit region = new Edit(0, a.size(), 0, b.size()); diffNonCommon(res, hc, ha, hb, region); return res; } /** * Compare two sequences and identify a list of edits between them. * * This method should be invoked only after the two sequences have been * proven to have no common starting or ending elements. The expected * elimination of common starting and ending elements is automatically * performed by the {@link #diff(SequenceComparator, Sequence, Sequence)} * method, which invokes this method using * {@link org.eclipse.jgit.diff.Subsequence}s. * * @param * type of Sequence compared * @param edits * result list to append the region's edits onto. * @param cmp * the comparator supplying the element equivalence function. * @param a * the first (also known as old or pre-image) sequence. Edits * returned by this algorithm will reference indexes using the * 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()}, * {@link org.eclipse.jgit.diff.Edit#getEndA()}. * @param b * the second (also known as new or post-image) sequence. Edits * returned by this algorithm will reference indexes using the * 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()}, * {@link org.eclipse.jgit.diff.Edit#getEndB()}. * @param region * the region being compared within the two sequences. */ public abstract void diffNonCommon(EditList edits, HashedSequenceComparator cmp, HashedSequence a, HashedSequence b, Edit region); } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 16470 Content-Disposition: inline; filename="MyersDiff.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "bedb0b335c24a1fe924cfb274d1cf7611f93fa8b" /* * Copyright (C) 2008-2009, Johannes E. Schindelin * Copyright (C) 2009, Johannes Schindelin and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import static java.nio.charset.StandardCharsets.UTF_8; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.text.MessageFormat; import org.eclipse.jgit.errors.DiffInterruptedException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.util.IntList; import org.eclipse.jgit.util.LongList; /** * Diff algorithm, based on "An O(ND) Difference Algorithm and its Variations", * by Eugene Myers. *
* The basic idea is to put the line numbers of text A as columns ("x") and the * lines of text B as rows ("y"). Now you try to find the shortest "edit path" * from the upper left corner to the lower right corner, where you can always go * horizontally or vertically, but diagonally from (x,y) to (x+1,y+1) only if * line x in text A is identical to line y in text B. *
* Myers' fundamental concept is the "furthest reaching D-path on diagonal k": a * D-path is an edit path starting at the upper left corner and containing * exactly D non-diagonal elements ("differences"). The furthest reaching D-path * on diagonal k is the one that contains the most (diagonal) elements which * ends on diagonal k (where k = y - x). *
* Example: * *
* H E L L O W O R L D * ____ * L \___ * O \___ * W \________ *
*
* Since every D-path has exactly D horizontal or vertical elements, it can only * end on the diagonals -D, -D+2, ..., D-2, D. *
* Since every furthest reaching D-path contains at least one furthest reaching * (D-1)-path (except for D=0), we can construct them recursively. *
* Since we are really interested in the shortest edit path, we can start * looking for a 0-path, then a 1-path, and so on, until we find a path that * ends in the lower right corner. *
* To save space, we do not need to store all paths (which has quadratic space * requirements), but generate the D-paths simultaneously from both sides. When * the ends meet, we will have found "the middle" of the path. From the end * points of that diagonal part, we can generate the rest recursively. *
* This only requires linear space. *
* The overall (runtime) complexity is: * *
* O(N * D^2 + 2 * N/2 * (D/2)^2 + 4 * N/4 * (D/4)^2 + ...) * = O(N * D^2 * 5 / 4) = O(N * D^2), *
*
* (With each step, we have to find the middle parts of twice as many regions as * before, but the regions (as well as the D) are halved.) *
* So the overall runtime complexity stays the same with linear space, albeit * with a larger constant factor. * * @param * type of sequence. */ @SuppressWarnings("hiding") public class MyersDiff { /** Singleton instance of MyersDiff. */ public static final DiffAlgorithm INSTANCE = new LowLevelDiffAlgorithm() { @SuppressWarnings("unused") @Override public void diffNonCommon(EditList edits, HashedSequenceComparator cmp, HashedSequence a, HashedSequence b, Edit region) { new MyersDiff<>(edits, cmp, a, b, region); } }; /** * The list of edits found during the last call to * {@link #calculateEdits(Edit)} */ protected EditList edits; /** Comparison function for sequences. */ protected HashedSequenceComparator cmp; /** * The first text to be compared. Referred to as "Text A" in the comments */ protected HashedSequence a; /** * The second text to be compared. Referred to as "Text B" in the comments */ protected HashedSequence b; private MyersDiff(EditList edits, HashedSequenceComparator cmp, HashedSequence a, HashedSequence b, Edit region) { this.edits = edits; this.cmp = cmp; this.a = a; this.b = b; calculateEdits(region); } // TODO: use ThreadLocal for future multi-threaded operations MiddleEdit middle = new MiddleEdit(); /** * Entrypoint into the algorithm this class is all about. This method triggers that the * differences between A and B are calculated in form of a list of edits. * @param r portion of the sequences to examine. */ private void calculateEdits(Edit r) { middle.initialize(r.beginA, r.endA, r.beginB, r.endB); if (middle.beginA >= middle.endA && middle.beginB >= middle.endB) return; calculateEdits(middle.beginA, middle.endA, middle.beginB, middle.endB); } /** * Calculates the differences between a given part of A against another * given part of B * * @param beginA * start of the part of A which should be compared * (0<=beginA<sizeof(A)) * @param endA * end of the part of A which should be compared * (beginA<=endA<sizeof(A)) * @param beginB * start of the part of B which should be compared * (0<=beginB<sizeof(B)) * @param endB * end of the part of B which should be compared * (beginB<=endB<sizeof(B)) */ protected void calculateEdits(int beginA, int endA, int beginB, int endB) { Edit edit = middle.calculate(beginA, endA, beginB, endB); if (beginA < edit.beginA || beginB < edit.beginB) { int k = edit.beginB - edit.beginA; int x = middle.backward.snake(k, edit.beginA); calculateEdits(beginA, x, beginB, k + x); } if (edit.getType() != Edit.Type.EMPTY) edits.add(edits.size(), edit); // after middle if (endA > edit.endA || endB > edit.endB) { int k = edit.endB - edit.endA; int x = middle.forward.snake(k, edit.endA); calculateEdits(x, endA, k + x, endB); } } /** * A class to help bisecting the sequences a and b to find minimal * edit paths. * * As the arrays are reused for space efficiency, you will need one * instance per thread. * * The entry function is the calculate() method. */ class MiddleEdit { void initialize(int beginA, int endA, int beginB, int endB) { this.beginA = beginA; this.endA = endA; this.beginB = beginB; this.endB = endB; // strip common parts on either end int k = beginB - beginA; this.beginA = forward.snake(k, beginA); this.beginB = k + this.beginA; k = endB - endA; this.endA = backward.snake(k, endA); this.endB = k + this.endA; } /* * This function calculates the "middle" Edit of the shortest * edit path between the given subsequences of a and b. * * Once a forward path and a backward path meet, we found the * middle part. From the last snake end point on both of them, * we construct the Edit. * * It is assumed that there is at least one edit in the range. */ // TODO: measure speed impact when this is synchronized Edit calculate(int beginA, int endA, int beginB, int endB) { if (beginA == endA || beginB == endB) return new Edit(beginA, endA, beginB, endB); this.beginA = beginA; this.endA = endA; this.beginB = beginB; this.endB = endB; /* * Following the conventions in Myers' paper, "k" is * the difference between the index into "b" and the * index into "a". */ int minK = beginB - endA; int maxK = endB - beginA; forward.initialize(beginB - beginA, beginA, minK, maxK); backward.initialize(endB - endA, endA, minK, maxK); for (int d = 1; ; d++) if (forward.calculate(d) || backward.calculate(d)) return edit; } /* * For each d, we need to hold the d-paths for the diagonals * k = -d, -d + 2, ..., d - 2, d. These are stored in the * forward (and backward) array. * * As we allow subsequences, too, this needs some refinement: * the forward paths start on the diagonal forwardK = * beginB - beginA, and backward paths start on the diagonal * backwardK = endB - endA. * * So, we need to hold the forward d-paths for the diagonals * k = forwardK - d, forwardK - d + 2, ..., forwardK + d and * the analogue for the backward d-paths. This means that * we can turn (k, d) into the forward array index using this * formula: * * i = (d + k - forwardK) / 2 * * There is a further complication: the edit paths should not * leave the specified subsequences, so k is bounded by * minK = beginB - endA and maxK = endB - beginA. However, * (k - forwardK) _must_ be odd whenever d is odd, and it * _must_ be even when d is even. * * The values in the "forward" and "backward" arrays are * positions ("x") in the sequence a, to get the corresponding * positions ("y") in the sequence b, you have to calculate * the appropriate k and then y: * * k = forwardK - d + i * 2 * y = k + x * * (substitute backwardK for forwardK if you want to get the * y position for an entry in the "backward" array. */ EditPaths forward = new ForwardEditPaths(); EditPaths backward = new BackwardEditPaths(); /* Some variables which are shared between methods */ protected int beginA, endA, beginB, endB; protected Edit edit; abstract class EditPaths { private IntList x = new IntList(); private LongList snake = new LongList(); int beginK, endK, middleK; int prevBeginK, prevEndK; /* if we hit one end early, no need to look further */ int minK, maxK; // TODO: better explanation final int getIndex(int d, int k) { // TODO: remove if (((d + k - middleK) % 2) != 0) throw new RuntimeException(MessageFormat.format(JGitText.get().unexpectedOddResult, Integer.valueOf(d), Integer.valueOf(k), Integer.valueOf(middleK))); return (d + k - middleK) / 2; } final int getX(int d, int k) { // TODO: remove if (k < beginK || k > endK) throw new RuntimeException(MessageFormat.format(JGitText.get().kNotInRange, Integer.valueOf(k), Integer.valueOf(beginK), Integer.valueOf(endK))); return x.get(getIndex(d, k)); } final long getSnake(int d, int k) { // TODO: remove if (k < beginK || k > endK) throw new RuntimeException(MessageFormat.format(JGitText.get().kNotInRange, Integer.valueOf(k), Integer.valueOf(beginK), Integer.valueOf(endK))); return snake.get(getIndex(d, k)); } private int forceKIntoRange(int k) { /* if k is odd, so must be the result */ if (k < minK) return minK + ((k ^ minK) & 1); else if (k > maxK) return maxK - ((k ^ maxK) & 1); return k; } void initialize(int k, int x, int minK, int maxK) { this.minK = minK; this.maxK = maxK; beginK = endK = middleK = k; this.x.clear(); this.x.add(x); snake.clear(); snake.add(newSnake(k, x)); } abstract int snake(int k, int x); abstract int getLeft(int x); abstract int getRight(int x); abstract boolean isBetter(int left, int right); abstract void adjustMinMaxK(int k, int x); abstract boolean meets(int d, int k, int x, long snake); final long newSnake(int k, int x) { long y = (long) k + x; long ret = ((long) x) << 32; return ret | y; } final int snake2x(long snake) { return (int) (snake >>> 32); } final int snake2y(long snake) { return (int) snake; } final boolean makeEdit(long snake1, long snake2) { int x1 = snake2x(snake1), x2 = snake2x(snake2); int y1 = snake2y(snake1), y2 = snake2y(snake2); /* * Check for incompatible partial edit paths: * when there are ambiguities, we might have * hit incompatible (i.e. non-overlapping) * forward/backward paths. * * In that case, just pretend that we have * an empty edit at the end of one snake; this * will force a decision which path to take * in the next recursion step. */ if (x1 > x2 || y1 > y2) { x1 = x2; y1 = y2; } edit = new Edit(x1, x2, y1, y2); return true; } boolean calculate(int d) { prevBeginK = beginK; prevEndK = endK; beginK = forceKIntoRange(middleK - d); endK = forceKIntoRange(middleK + d); // TODO: handle i more efficiently // TODO: walk snake(k, getX(d, k)) only once per (d, k) // TODO: move end points out of the loop to avoid conditionals inside the loop // go backwards so that we can avoid temp vars for (int k = endK; k >= beginK; k -= 2) { if (Thread.interrupted()) { throw new DiffInterruptedException(); } int left = -1, right = -1; long leftSnake = -1L, rightSnake = -1L; // TODO: refactor into its own function if (k > prevBeginK) { int i = getIndex(d - 1, k - 1); left = x.get(i); int end = snake(k - 1, left); leftSnake = left != end ? newSnake(k - 1, end) : snake.get(i); if (meets(d, k - 1, end, leftSnake)) return true; left = getLeft(end); } if (k < prevEndK) { int i = getIndex(d - 1, k + 1); right = x.get(i); int end = snake(k + 1, right); rightSnake = right != end ? newSnake(k + 1, end) : snake.get(i); if (meets(d, k + 1, end, rightSnake)) return true; right = getRight(end); } int newX; long newSnake; if (k >= prevEndK || (k > prevBeginK && isBetter(left, right))) { newX = left; newSnake = leftSnake; } else { newX = right; newSnake = rightSnake; } if (meets(d, k, newX, newSnake)) return true; adjustMinMaxK(k, newX); int i = getIndex(d, k); x.set(i, newX); snake.set(i, newSnake); } return false; } } class ForwardEditPaths extends EditPaths { @Override final int snake(int k, int x) { for (; x < endA && k + x < endB; x++) if (!cmp.equals(a, x, b, k + x)) break; return x; } @Override final int getLeft(int x) { return x; } @Override final int getRight(int x) { return x + 1; } @Override final boolean isBetter(int left, int right) { return left > right; } @Override final void adjustMinMaxK(int k, int x) { if (x >= endA || k + x >= endB) { if (k > backward.middleK) maxK = k; else minK = k; } } @Override final boolean meets(int d, int k, int x, long snake) { if (k < backward.beginK || k > backward.endK) return false; // TODO: move out of loop if (((d - 1 + k - backward.middleK) % 2) != 0) return false; if (x < backward.getX(d - 1, k)) return false; makeEdit(snake, backward.getSnake(d - 1, k)); return true; } } class BackwardEditPaths extends EditPaths { @Override final int snake(int k, int x) { for (; x > beginA && k + x > beginB; x--) if (!cmp.equals(a, x - 1, b, k + x - 1)) break; return x; } @Override final int getLeft(int x) { return x - 1; } @Override final int getRight(int x) { return x; } @Override final boolean isBetter(int left, int right) { return left < right; } @Override final void adjustMinMaxK(int k, int x) { if (x <= beginA || k + x <= beginB) { if (k > forward.middleK) maxK = k; else minK = k; } } @Override final boolean meets(int d, int k, int x, long snake) { if (k < forward.beginK || k > forward.endK) return false; // TODO: move out of loop if (((d + k - forward.middleK) % 2) != 0) return false; if (x > forward.getX(d, k)) return false; makeEdit(forward.getSnake(d, k), snake); return true; } } } /** * Main method * * @param args * two filenames specifying the contents to be diffed */ public static void main(String[] args) { if (args.length != 2) { err().println(JGitText.get().need2Arguments); System.exit(1); } try { RawText a = new RawText(new java.io.File(args[0])); RawText b = new RawText(new java.io.File(args[1])); EditList r = INSTANCE.diff(RawTextComparator.DEFAULT, a, b); System.out.println(r.toString()); } catch (Exception e) { PrintWriter err = err(); err.println(e.getMessage()); e.printStackTrace(err); } } private static PrintWriter err() { return new PrintWriter(new OutputStreamWriter(System.err, UTF_8)); } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 1670 Content-Disposition: inline; filename="PatchIdDiffFormatter.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "b401bbe73db80c541335d48c47e9a7fe2c61ba8b" /* * Copyright (C) 2011, Stefan Lay and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import java.io.IOException; import java.io.OutputStream; import java.security.DigestOutputStream; import java.security.MessageDigest; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.util.io.NullOutputStream; /** * A DiffFormatter used to calculate the patch-id of the diff. */ public class PatchIdDiffFormatter extends DiffFormatter { private final MessageDigest digest; /** * Initialize a formatter to compute a patch id. */ public PatchIdDiffFormatter() { super(new DigestOutputStream(NullOutputStream.INSTANCE, Constants.newMessageDigest())); digest = ((DigestOutputStream) getOutputStream()).getMessageDigest(); } /** * Should be called after having called one of the format methods * * @return the patch id calculated for the provided diff. */ public ObjectId getCalulatedPatchId() { return ObjectId.fromRaw(digest.digest()); } @Override protected void writeHunkHeader(int aStartLine, int aEndLine, int bStartLine, int bEndLine, String funcName) throws IOException { // The hunk header is not taken into account for patch id calculation } @Override protected void formatIndexLine(OutputStream o, DiffEntry ent) throws IOException { // The index line is not taken into account for patch id calculation } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 17428 Content-Disposition: inline; filename="RawText.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "fdfe533618415c7d6d3ba1fae72687f7ede99dd4" /* * Copyright (C) 2009, Google Inc. * Copyright (C) 2008-2021, Johannes E. Schindelin and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import java.io.EOFException; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; import java.util.concurrent.atomic.AtomicInteger; import org.eclipse.jgit.errors.BinaryBlobException; import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.util.IO; import org.eclipse.jgit.util.IntList; import org.eclipse.jgit.util.RawParseUtils; /** * A Sequence supporting UNIX formatted text in byte[] format. *
* Elements of the sequence are the lines of the file, as delimited by the UNIX * newline character ('\n'). The file content is treated as 8 bit binary text, * with no assumptions or requirements on character encoding. *
* Note that the first line of the file is element 0, as defined by the Sequence * interface API. Traditionally in a text editor a patch file the first line is * line number 1. Callers may need to subtract 1 prior to invoking methods if * they are converting from "line number" to "element index". */ public class RawText extends Sequence { /** A RawText of length 0 */ public static final RawText EMPTY_TEXT = new RawText(new byte[0]); /** * Default and minimum for {@link #BUFFER_SIZE}. */ private static final int FIRST_FEW_BYTES = 8 * 1024; /** * Number of bytes to check for heuristics in {@link #isBinary(byte[])}. */ private static final AtomicInteger BUFFER_SIZE = new AtomicInteger( FIRST_FEW_BYTES); /** The file content for this sequence. */ protected final byte[] content; /** Map of line number to starting position within {@link #content}. */ protected final IntList lines; /** * Create a new sequence from an existing content byte array. *
* The entire array (indexes 0 through length-1) is used as the content. * * @param input * the content array. The object retains a reference to this * array, so it should be immutable. */ public RawText(byte[] input) { this(input, RawParseUtils.lineMap(input, 0, input.length)); } /** * Create a new sequence from the existing content byte array and the line * map indicating line boundaries. * * @param input * the content array. The object retains a reference to this * array, so it should be immutable. * @param lineMap * an array with 1-based offsets for the start of each line. * The first and last entries should be {@link Integer#MIN_VALUE} * and an offset one past the end of the last line, respectively. * @since 5.0 */ public RawText(byte[] input, IntList lineMap) { content = input; lines = lineMap; } /** * Create a new sequence from a file. *
* The entire file contents are used. * * @param file * the text file. * @throws java.io.IOException * if Exceptions occur while reading the file */ public RawText(File file) throws IOException { this(IO.readFully(file)); } /** * Get the raw content * * @return the raw, unprocessed content read. * @since 4.11 */ public byte[] getRawContent() { return content; } /** @return total number of items in the sequence. */ @Override public int size() { // The line map is always 2 entries larger than the number of lines in // the file. Index 0 is padded out/unused. The last index is the total // length of the buffer, and acts as a sentinel. // return lines.size() - 2; } /** * Write a specific line to the output stream, without its trailing LF. *
~~* The specified line is copied as-is, with no character encoding * translation performed. *~~
* If the specified line ends with an LF ('\n'), the LF is not * copied. It is up to the caller to write the LF, if desired, between * output lines. * * @param out * stream to copy the line data onto. * @param i * index of the line to extract. Note this is 0-based, so line * number 1 is actually index 0. * @throws java.io.IOException * the stream write operation failed. */ public void writeLine(OutputStream out, int i) throws IOException { int start = getStart(i); int end = getEnd(i); if (content[end - 1] == '\n') end--; out.write(content, start, end - start); } /** * Determine if the file ends with a LF ('\n'). * * @return true if the last line has an LF; false otherwise. */ public boolean isMissingNewlineAtEnd() { final int end = lines.get(lines.size() - 1); if (end == 0) return true; return content[end - 1] != '\n'; } /** * Get the text for a single line. * * @param i * index of the line to extract. Note this is 0-based, so line * number 1 is actually index 0. * @return the text for the line, without a trailing LF. */ public String getString(int i) { return getString(i, i + 1, true); } /** * Get the raw text for a single line. * * @param i * index of the line to extract. Note this is 0-based, so line * number 1 is actually index 0. * @return the text for the line, without a trailing LF, as a * {@link ByteBuffer} that is backed by a slice of the * {@link #getRawContent() raw content}, with the buffer's position * on the start of the line and the limit at the end. * @since 5.12 */ public ByteBuffer getRawString(int i) { int s = getStart(i); int e = getEnd(i); if (e > 0 && content[e - 1] == '\n') { e--; } return ByteBuffer.wrap(content, s, e - s); } /** * Get the text for a region of lines. * * @param begin * index of the first line to extract. Note this is 0-based, so * line number 1 is actually index 0. * @param end * index of one past the last line to extract. * @param dropLF * if true the trailing LF ('\n') of the last returned line is * dropped, if present. * @return the text for lines {@code [begin, end)}. */ public String getString(int begin, int end, boolean dropLF) { if (begin == end) return ""; //$NON-NLS-1$ int s = getStart(begin); int e = getEnd(end - 1); if (dropLF && content[e - 1] == '\n') e--; return decode(s, e); } /** * Decode a region of the text into a String. * * The default implementation of this method tries to guess the character * set by considering UTF-8, the platform default, and falling back on * ISO-8859-1 if neither of those can correctly decode the region given. * * @param start * first byte of the content to decode. * @param end * one past the last byte of the content to decode. * @return the region {@code [start, end)} decoded as a String. */ protected String decode(int start, int end) { return RawParseUtils.decode(content, start, end); } private int getStart(int i) { return lines.get(i + 1); } private int getEnd(int i) { return lines.get(i + 2); } /** * Obtains the buffer size to use for analyzing whether certain content is * text or binary, or what line endings are used if it's text. * * @return the buffer size, by default {@link #FIRST_FEW_BYTES} bytes * @since 6.0 */ public static int getBufferSize() { return BUFFER_SIZE.get(); } /** * Sets the buffer size to use for analyzing whether certain content is text * or binary, or what line endings are used if it's text. If the given * {@code bufferSize} is smaller than {@link #FIRST_FEW_BYTES} set the * buffer size to {@link #FIRST_FEW_BYTES}. * * @param bufferSize * Size to set * @return the size actually set * @since 6.0 */ public static int setBufferSize(int bufferSize) { int newSize = Math.max(FIRST_FEW_BYTES, bufferSize); return BUFFER_SIZE.updateAndGet(curr -> newSize); } /** * Determine heuristically whether the bytes contained in a stream * represents binary (as opposed to text) content. * * Note: Do not further use this stream after having called this method! The * stream may not be fully read and will be left at an unknown position * after consuming an unknown number of bytes. The caller is responsible for * closing the stream. * * @param raw * input stream containing the raw file content. * @return true if raw is likely to be a binary file, false otherwise * @throws java.io.IOException * if input stream could not be read */ public static boolean isBinary(InputStream raw) throws IOException { final byte[] buffer = new byte[getBufferSize() + 1]; int cnt = 0; while (cnt < buffer.length) { final int n = raw.read(buffer, cnt, buffer.length - cnt); if (n == -1) { break; } cnt += n; } return isBinary(buffer, cnt, cnt < buffer.length); } /** * Determine heuristically whether a byte array represents binary (as * opposed to text) content. * * @param raw * the raw file content. * @return true if raw is likely to be a binary file, false otherwise */ public static boolean isBinary(byte[] raw) { return isBinary(raw, raw.length); } /** * Determine heuristically whether a byte array represents binary (as * opposed to text) content. * * @param raw * the raw file content. * @param length * number of bytes in {@code raw} to evaluate. This should be * {@code raw.length} unless {@code raw} was over-allocated by * the caller. * @return true if raw is likely to be a binary file, false otherwise */ public static boolean isBinary(byte[] raw, int length) { return isBinary(raw, length, false); } /** * Determine heuristically whether a byte array represents binary (as * opposed to text) content. * * @param raw * the raw file content. * @param length * number of bytes in {@code raw} to evaluate. This should be * {@code raw.length} unless {@code raw} was over-allocated by * the caller. * @param complete * whether {@code raw} contains the whole data * @return true if raw is likely to be a binary file, false otherwise * @since 6.0 */ public static boolean isBinary(byte[] raw, int length, boolean complete) { // Similar heuristic as C Git. Differences: // - limited buffer size; may be only the beginning of a large blob // - no counting of printable vs. non-printable bytes < 0x20 and 0x7F int maxLength = getBufferSize(); boolean isComplete = complete; if (length > maxLength) { // We restrict the length in all cases to getBufferSize() to get // predictable behavior. Sometimes we load streams, and sometimes we // have the full data in memory. With streams, we never look at more // than the first getBufferSize() bytes. If we looked at more when // we have the full data, different code paths in JGit might come to // different conclusions. length = maxLength; isComplete = false; } int ptr = -1; byte current; while (ptr < length - 2) { current = raw[++ptr]; if (current == '\0' || (current == '\r' && raw[++ptr] != '\n')) { return true; } } if (ptr == length - 2) { // if '\r' be last, then if isComplete then return binary current = raw[++ptr]; return current == '\0' || (current == '\r' && isComplete); } return false; } /** * Determines from the last two bytes read from a source if it looks like * binary content. * * @param curr * the last byte, read after {@code prev} * @param prev * the previous byte, read before {@code last} * @return {@code true} if either byte is NUL, or if prev is CR and curr is * not LF, {@code false} otherwise * @since 6.0 */ public static boolean isBinary(byte curr, byte prev) { return curr == '\0' || (curr != '\n' && prev == '\r') || prev == '\0'; } /** * Determine heuristically whether a byte array represents text content * using CR-LF as line separator. * * @param raw * the raw file content. * @return {@code true} if raw is likely to be CR-LF delimited text, * {@code false} otherwise * @since 5.3 */ public static boolean isCrLfText(byte[] raw) { return isCrLfText(raw, raw.length); } /** * Determine heuristically whether the bytes contained in a stream represent * text content using CR-LF as line separator. * * Note: Do not further use this stream after having called this method! The * stream may not be fully read and will be left at an unknown position * after consuming an unknown number of bytes. The caller is responsible for * closing the stream. * * @param raw * input stream containing the raw file content. * @return {@code true} if raw is likely to be CR-LF delimited text, * {@code false} otherwise * @throws java.io.IOException * if input stream could not be read * @since 5.3 */ public static boolean isCrLfText(InputStream raw) throws IOException { byte[] buffer = new byte[getBufferSize()]; int cnt = 0; while (cnt < buffer.length) { int n = raw.read(buffer, cnt, buffer.length - cnt); if (n == -1) { break; } cnt += n; } return isCrLfText(buffer, cnt); } /** * Determine heuristically whether a byte array represents text content * using CR-LF as line separator. * * @param raw * the raw file content. * @param length * number of bytes in {@code raw} to evaluate. * @return {@code true} if raw is likely to be CR-LF delimited text, * {@code false} otherwise * @since 5.3 */ public static boolean isCrLfText(byte[] raw, int length) { return isCrLfText(raw, length, false); } /** * Determine heuristically whether a byte array represents text content * using CR-LF as line separator. * * @param raw * the raw file content. * @param length * number of bytes in {@code raw} to evaluate. * @return {@code true} if raw is likely to be CR-LF delimited text, * {@code false} otherwise * @param complete * whether {@code raw} contains the whole data * @since 6.0 */ public static boolean isCrLfText(byte[] raw, int length, boolean complete) { boolean has_crlf = false; int ptr = -1; byte current; while (ptr < length - 2) { current = raw[++ptr]; if (current == '\0') { return false; } if (current == '\r') { if (raw[++ptr] != '\n') { return false; } has_crlf = true; } } if (ptr == length - 2) { // if '\r' be last, then if isComplete then return binary current = raw[++ptr]; if (current == '\0' || (current == '\r' && complete)) { return false; } } return has_crlf; } /** * Get the line delimiter for the first line. * * @since 2.0 * @return the line delimiter or null */ public String getLineDelimiter() { if (size() == 0) { return null; } int e = getEnd(0); if (content[e - 1] != '\n') { return null; } if (content.length > 1 && e > 1 && content[e - 2] == '\r') { return "\r\n"; //$NON-NLS-1$ } return "\n"; //$NON-NLS-1$ } /** * Read a blob object into RawText, or throw BinaryBlobException if the blob * is binary. * * @param ldr * the ObjectLoader for the blob * @param threshold * if the blob is larger than this size, it is always assumed to * be binary. * @since 4.10 * @return the RawText representing the blob. * @throws org.eclipse.jgit.errors.BinaryBlobException * if the blob contains binary data. * @throws java.io.IOException * if the input could not be read. */ public static RawText load(ObjectLoader ldr, int threshold) throws IOException, BinaryBlobException { long sz = ldr.getSize(); if (sz > threshold) { throw new BinaryBlobException(); } int bufferSize = getBufferSize(); if (sz <= bufferSize) { byte[] data = ldr.getCachedBytes(bufferSize); if (isBinary(data, data.length, true)) { throw new BinaryBlobException(); } return new RawText(data); } byte[] head = new byte[bufferSize]; try (InputStream stream = ldr.openStream()) { int off = 0; int left = head.length; byte last = 'x'; // Just something inconspicuous while (left > 0) { int n = stream.read(head, off, left); if (n < 0) { throw new EOFException(); } left -= n; while (n > 0) { byte curr = head[off]; if (isBinary(curr, last)) { throw new BinaryBlobException(); } last = curr; off++; n--; } } byte[] data; try { data = new byte[(int)sz]; } catch (OutOfMemoryError e) { throw new LargeObjectException.OutOfMemory(e); } System.arraycopy(head, 0, data, 0, head.length); IO.readFully(stream, data, off, (int) (sz-off)); return new RawText(data, RawParseUtils.lineMapOrBinary(data, 0, (int) sz)); } } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 7954 Content-Disposition: inline; filename="RawTextComparator.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "e06e2f8290a3037482a4da1c8d9eb3c4bce75585" /* * Copyright (C) 2009-2010, Google Inc. * Copyright (C) 2008-2009, Johannes E. Schindelin and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import static org.eclipse.jgit.util.RawCharUtil.isWhitespace; import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace; import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace; import org.eclipse.jgit.util.IntList; /** * Equivalence function for {@link org.eclipse.jgit.diff.RawText}. */ public abstract class RawTextComparator extends SequenceComparator { /** No special treatment. */ public static final RawTextComparator DEFAULT = new RawTextComparator() { @Override public boolean equals(RawText a, int ai, RawText b, int bi) { ai++; bi++; int as = a.lines.get(ai); int bs = b.lines.get(bi); final int ae = a.lines.get(ai + 1); final int be = b.lines.get(bi + 1); if (ae - as != be - bs) return false; while (as < ae) { if (a.content[as++] != b.content[bs++]) return false; } return true; } @Override protected int hashRegion(byte[] raw, int ptr, int end) { int hash = 5381; for (; ptr < end; ptr++) hash = ((hash << 5) + hash) + (raw[ptr] & 0xff); return hash; } }; /** Ignores all whitespace. */ public static final RawTextComparator WS_IGNORE_ALL = new RawTextComparator() { @Override public boolean equals(RawText a, int ai, RawText b, int bi) { ai++; bi++; int as = a.lines.get(ai); int bs = b.lines.get(bi); int ae = a.lines.get(ai + 1); int be = b.lines.get(bi + 1); ae = trimTrailingWhitespace(a.content, as, ae); be = trimTrailingWhitespace(b.content, bs, be); while (as < ae && bs < be) { byte ac = a.content[as]; byte bc = b.content[bs]; while (as < ae - 1 && isWhitespace(ac)) { as++; ac = a.content[as]; } while (bs < be - 1 && isWhitespace(bc)) { bs++; bc = b.content[bs]; } if (ac != bc) return false; as++; bs++; } return as == ae && bs == be; } @Override protected int hashRegion(byte[] raw, int ptr, int end) { int hash = 5381; for (; ptr < end; ptr++) { byte c = raw[ptr]; if (!isWhitespace(c)) hash = ((hash << 5) + hash) + (c & 0xff); } return hash; } }; /** * Ignore leading whitespace. **/ public static final RawTextComparator WS_IGNORE_LEADING = new RawTextComparator() { @Override public boolean equals(RawText a, int ai, RawText b, int bi) { ai++; bi++; int as = a.lines.get(ai); int bs = b.lines.get(bi); int ae = a.lines.get(ai + 1); int be = b.lines.get(bi + 1); as = trimLeadingWhitespace(a.content, as, ae); bs = trimLeadingWhitespace(b.content, bs, be); if (ae - as != be - bs) return false; while (as < ae) { if (a.content[as++] != b.content[bs++]) return false; } return true; } @Override protected int hashRegion(byte[] raw, int ptr, int end) { int hash = 5381; ptr = trimLeadingWhitespace(raw, ptr, end); for (; ptr < end; ptr++) hash = ((hash << 5) + hash) + (raw[ptr] & 0xff); return hash; } }; /** Ignores trailing whitespace. */ public static final RawTextComparator WS_IGNORE_TRAILING = new RawTextComparator() { @Override public boolean equals(RawText a, int ai, RawText b, int bi) { ai++; bi++; int as = a.lines.get(ai); int bs = b.lines.get(bi); int ae = a.lines.get(ai + 1); int be = b.lines.get(bi + 1); ae = trimTrailingWhitespace(a.content, as, ae); be = trimTrailingWhitespace(b.content, bs, be); if (ae - as != be - bs) return false; while (as < ae) { if (a.content[as++] != b.content[bs++]) return false; } return true; } @Override protected int hashRegion(byte[] raw, int ptr, int end) { int hash = 5381; end = trimTrailingWhitespace(raw, ptr, end); for (; ptr < end; ptr++) hash = ((hash << 5) + hash) + (raw[ptr] & 0xff); return hash; } }; /** Ignores whitespace occurring between non-whitespace characters. */ public static final RawTextComparator WS_IGNORE_CHANGE = new RawTextComparator() { @Override public boolean equals(RawText a, int ai, RawText b, int bi) { ai++; bi++; int as = a.lines.get(ai); int bs = b.lines.get(bi); int ae = a.lines.get(ai + 1); int be = b.lines.get(bi + 1); ae = trimTrailingWhitespace(a.content, as, ae); be = trimTrailingWhitespace(b.content, bs, be); while (as < ae && bs < be) { byte ac = a.content[as++]; byte bc = b.content[bs++]; if (isWhitespace(ac) && isWhitespace(bc)) { as = trimLeadingWhitespace(a.content, as, ae); bs = trimLeadingWhitespace(b.content, bs, be); } else if (ac != bc) { return false; } } return as == ae && bs == be; } @Override protected int hashRegion(byte[] raw, int ptr, int end) { int hash = 5381; end = trimTrailingWhitespace(raw, ptr, end); while (ptr < end) { byte c = raw[ptr++]; if (isWhitespace(c)) { ptr = trimLeadingWhitespace(raw, ptr, end); c = ' '; } hash = ((hash << 5) + hash) + (c & 0xff); } return hash; } }; @Override public int hash(RawText seq, int lno) { final int begin = seq.lines.get(lno + 1); final int end = seq.lines.get(lno + 2); return hashRegion(seq.content, begin, end); } @Override public Edit reduceCommonStartEnd(RawText a, RawText b, Edit e) { // This is a faster exact match based form that tries to improve // performance for the common case of the header and trailer of // a text file not changing at all. After this fast path we use // the slower path based on the super class' using equals() to // allow for whitespace ignore modes to still work. if (e.beginA == e.endA || e.beginB == e.endB) return e; byte[] aRaw = a.content; byte[] bRaw = b.content; int aPtr = a.lines.get(e.beginA + 1); int bPtr = a.lines.get(e.beginB + 1); int aEnd = a.lines.get(e.endA + 1); int bEnd = b.lines.get(e.endB + 1); // This can never happen, but the JIT doesn't know that. If we // define this assertion before the tight while loops below it // should be able to skip the array bound checks on access. // if (aPtr < 0 || bPtr < 0 || aEnd > aRaw.length || bEnd > bRaw.length) throw new ArrayIndexOutOfBoundsException(); while (aPtr < aEnd && bPtr < bEnd && aRaw[aPtr] == bRaw[bPtr]) { aPtr++; bPtr++; } while (aPtr < aEnd && bPtr < bEnd && aRaw[aEnd - 1] == bRaw[bEnd - 1]) { aEnd--; bEnd--; } e.beginA = findForwardLine(a.lines, e.beginA, aPtr); e.beginB = findForwardLine(b.lines, e.beginB, bPtr); e.endA = findReverseLine(a.lines, e.endA, aEnd); final boolean partialA = aEnd < a.lines.get(e.endA + 1); if (partialA) bEnd += a.lines.get(e.endA + 1) - aEnd; e.endB = findReverseLine(b.lines, e.endB, bEnd); if (!partialA && bEnd < b.lines.get(e.endB + 1)) e.endA++; return super.reduceCommonStartEnd(a, b, e); } private static int findForwardLine(IntList lines, int idx, int ptr) { final int end = lines.size() - 2; while (idx < end && lines.get(idx + 2) < ptr) idx++; return idx; } private static int findReverseLine(IntList lines, int idx, int ptr) { while (0 < idx && ptr <= lines.get(idx)) idx--; return idx; } /** * Compute a hash code for a region. * * @param raw * the raw file content. * @param ptr * first byte of the region to hash. * @param end * 1 past the last byte of the region. * @return hash code for the region [ptr, end) of raw. */ protected abstract int hashRegion(byte[] raw, int ptr, int end); } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 23568 Content-Disposition: inline; filename="RenameDetector.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "fd84bc6c87899399fa8cfffdc512075434338930" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import static org.eclipse.jgit.diff.DiffEntry.Side.NEW; import static org.eclipse.jgit.diff.DiffEntry.Side.OLD; import static org.eclipse.jgit.storage.pack.PackConfig.DEFAULT_BIG_FILE_THRESHOLD; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import org.eclipse.jgit.api.errors.CanceledException; import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.diff.SimilarityIndex.TableFullException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.lib.AbbreviatedObjectId; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.Repository; /** * Detect and resolve object renames. */ public class RenameDetector { private static final int EXACT_RENAME_SCORE = 100; private static final Comparator DIFF_COMPARATOR = new Comparator<>() { @Override public int compare(DiffEntry a, DiffEntry b) { int cmp = nameOf(a).compareTo(nameOf(b)); if (cmp == 0) cmp = sortOf(a.getChangeType()) - sortOf(b.getChangeType()); return cmp; } private String nameOf(DiffEntry ent) { // Sort by the new name, unless the change is a delete. On // deletes the new name is /dev/null, so we sort instead by // the old name. // if (ent.changeType == ChangeType.DELETE) return ent.oldPath; return ent.newPath; } private int sortOf(ChangeType changeType) { // Sort deletes before adds so that a major type change for // a file path (such as symlink to regular file) will first // remove the path, then add it back with the new type. // switch (changeType) { case DELETE: return 1; case ADD: return 2; default: return 10; } } }; private List entries; private List deleted; private List added; private boolean done; private final ObjectReader objectReader; /** Similarity score required to pair an add/delete as a rename. */ private int renameScore = 60; /** * Similarity score required to keep modified file pairs together. Any * modified file pairs with a similarity score below this will be broken * apart. */ private int breakScore = -1; /** Limit in the number of files to consider for renames. */ private int renameLimit; /** * File size threshold (in bytes) for detecting renames. Files larger * than this size will not be processed for renames. */ private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD; /** * Skip detecting content renames for binary files. Content renames are * those that are not exact, that is with a slight content modification * between the two files. */ private boolean skipContentRenamesForBinaryFiles = false; /** Set if the number of adds or deletes was over the limit. */ private boolean overRenameLimit; /** * Create a new rename detector for the given repository * * @param repo * the repository to use for rename detection */ public RenameDetector(Repository repo) { this(repo.newObjectReader(), repo.getConfig().get(DiffConfig.KEY)); } /** * Create a new rename detector with a specified reader and diff config. * * @param reader * reader to obtain objects from the repository with. * @param cfg * diff config specifying rename detection options. * @since 3.0 */ public RenameDetector(ObjectReader reader, DiffConfig cfg) { objectReader = reader.newReader(); renameLimit = cfg.getRenameLimit(); reset(); } /** * Get rename score * * @return minimum score required to pair an add/delete as a rename. The * score ranges are within the bounds of (0, 100). */ public int getRenameScore() { return renameScore; } /** * Set the minimum score required to pair an add/delete as a rename. *
* When comparing two files together their score must be greater than or * equal to the rename score for them to be considered a rename match. The * score is computed based on content similarity, so a score of 60 implies * that approximately 60% of the bytes in the files are identical. * * @param score * new rename score, must be within [0, 100]. * @throws java.lang.IllegalArgumentException * the score was not within [0, 100]. */ public void setRenameScore(int score) { if (score < 0 || score > 100) throw new IllegalArgumentException( JGitText.get().similarityScoreMustBeWithinBounds); renameScore = score; } /** * Get break score * * @return the similarity score required to keep modified file pairs * together. Any modify pairs that score below this will be broken * apart into separate add/deletes. Values less than or equal to * zero indicate that no modifies will be broken apart. Values over * 100 cause all modify pairs to be broken. */ public int getBreakScore() { return breakScore; } /** * Set break score * * @param breakScore * the similarity score required to keep modified file pairs * together. Any modify pairs that score below this will be * broken apart into separate add/deletes. Values less than or * equal to zero indicate that no modifies will be broken apart. * Values over 100 cause all modify pairs to be broken. */ public void setBreakScore(int breakScore) { this.breakScore = breakScore; } /** * Get rename limit * * @return limit on number of paths to perform inexact rename detection */ public int getRenameLimit() { return renameLimit; } /** * Set the limit on the number of files to perform inexact rename detection. *
* The rename detector has to build a square matrix of the rename limit on * each side, then perform that many file compares to determine similarity. * If 1000 files are added, and 1000 files are deleted, a 1000*1000 matrix * must be allocated, and 1,000,000 file compares may need to be performed. * * @param limit * new file limit. 0 means no limit; a negative number means no * inexact rename detection will be performed, only exact rename * detection. */ public void setRenameLimit(int limit) { renameLimit = limit; } /** * Get file size threshold for detecting renames. Files larger * than this size will not be processed for rename detection. * * @return threshold in bytes of the file size. * @since 5.12 */ public int getBigFileThreshold() { return bigFileThreshold; } /** * Set the file size threshold for detecting renames. Files larger than this * threshold will be skipped during rename detection computation. * * @param threshold file size threshold in bytes. * @since 5.12 */ public void setBigFileThreshold(int threshold) { this.bigFileThreshold = threshold; } /** * Get skipping detecting content renames for binary files. * * @return true if content renames should be skipped for binary files, false otherwise. * @since 5.12 */ public boolean getSkipContentRenamesForBinaryFiles() { return skipContentRenamesForBinaryFiles; } /** * Sets skipping detecting content renames for binary files. * * @param value true if content renames should be skipped for binary files, false otherwise. * @since 5.12 */ public void setSkipContentRenamesForBinaryFiles(boolean value) { this.skipContentRenamesForBinaryFiles = value; } /** * Check if the detector is over the rename limit. *
* This method can be invoked either before or after {@code getEntries} has * been used to perform rename detection. * * @return true if the detector has more file additions or removals than the * rename limit is currently set to. In such configurations the * detector will skip expensive computation. */ public boolean isOverRenameLimit() { if (done) return overRenameLimit; int cnt = Math.max(added.size(), deleted.size()); return getRenameLimit() != 0 && getRenameLimit() < cnt; } /** * Add entries to be considered for rename detection. * * @param entriesToAdd * one or more entries to add. * @throws java.lang.IllegalStateException * if {@code getEntries} was already invoked. */ public void addAll(Collection entriesToAdd) { if (done) throw new IllegalStateException(JGitText.get().renamesAlreadyFound); for (DiffEntry entry : entriesToAdd) { switch (entry.getChangeType()) { case ADD: added.add(entry); break; case DELETE: deleted.add(entry); break; case MODIFY: if (sameType(entry.getOldMode(), entry.getNewMode())) { entries.add(entry); } else { List tmp = DiffEntry.breakModify(entry); deleted.add(tmp.get(0)); added.add(tmp.get(1)); } break; case COPY: case RENAME: default: entries.add(entry); } } } /** * Add an entry to be considered for rename detection. * * @param entry * to add. * @throws java.lang.IllegalStateException * if {@code getEntries} was already invoked. */ public void add(DiffEntry entry) { addAll(Collections.singletonList(entry)); } /** * Detect renames in the current file set. *
~~* This convenience function runs without a progress monitor. *~~
* * @return an unmodifiable list of {@link org.eclipse.jgit.diff.DiffEntry}s * representing all files that have been changed. * @throws java.io.IOException * file contents cannot be read from the repository. */ public List compute() throws IOException { try { return compute(NullProgressMonitor.INSTANCE); } catch (CanceledException e) { // Won't happen with a NullProgressMonitor return Collections.emptyList(); } } /** * Detect renames in the current file set. * * @param pm * report progress during the detection phases. * @return an unmodifiable list of {@link org.eclipse.jgit.diff.DiffEntry}s * representing all files that have been changed. * @throws java.io.IOException * file contents cannot be read from the repository. * @throws CanceledException * if rename detection was cancelled */ public List compute(ProgressMonitor pm) throws IOException, CanceledException { if (!done) { try { return compute(objectReader, pm); } finally { objectReader.close(); } } return Collections.unmodifiableList(entries); } /** * Detect renames in the current file set. * * @param reader * reader to obtain objects from the repository with. * @param pm * report progress during the detection phases. * @return an unmodifiable list of {@link org.eclipse.jgit.diff.DiffEntry}s * representing all files that have been changed. * @throws java.io.IOException * file contents cannot be read from the repository. * @throws CanceledException * if rename detection was cancelled */ public List compute(ObjectReader reader, ProgressMonitor pm) throws IOException, CanceledException { final ContentSource cs = ContentSource.create(reader); return compute(new ContentSource.Pair(cs, cs), pm); } /** * Detect renames in the current file set. * * @param reader * reader to obtain objects from the repository with. * @param pm * report progress during the detection phases. * @return an unmodifiable list of {@link org.eclipse.jgit.diff.DiffEntry}s * representing all files that have been changed. * @throws java.io.IOException * file contents cannot be read from the repository. * @throws CanceledException * if rename detection was cancelled */ public List compute(ContentSource.Pair reader, ProgressMonitor pm) throws IOException, CanceledException { if (!done) { done = true; if (pm == null) pm = NullProgressMonitor.INSTANCE; if (0 < breakScore) breakModifies(reader, pm); if (!added.isEmpty() && !deleted.isEmpty()) findExactRenames(pm); if (!added.isEmpty() && !deleted.isEmpty()) findContentRenames(reader, pm); if (0 < breakScore && !added.isEmpty() && !deleted.isEmpty()) rejoinModifies(pm); entries.addAll(added); added = null; entries.addAll(deleted); deleted = null; Collections.sort(entries, DIFF_COMPARATOR); } return Collections.unmodifiableList(entries); } /** * Reset this rename detector for another rename detection pass. */ public void reset() { entries = new ArrayList<>(); deleted = new ArrayList<>(); added = new ArrayList<>(); done = false; } private void advanceOrCancel(ProgressMonitor pm) throws CanceledException { if (pm.isCancelled()) { throw new CanceledException(JGitText.get().renameCancelled); } pm.update(1); } private void breakModifies(ContentSource.Pair reader, ProgressMonitor pm) throws IOException, CanceledException { ArrayList newEntries = new ArrayList<>(entries.size()); pm.beginTask(JGitText.get().renamesBreakingModifies, entries.size()); for (int i = 0; i < entries.size(); i++) { DiffEntry e = entries.get(i); if (e.getChangeType() == ChangeType.MODIFY) { int score = calculateModifyScore(reader, e); if (score < breakScore) { List tmp = DiffEntry.breakModify(e); DiffEntry del = tmp.get(0); del.score = score; deleted.add(del); added.add(tmp.get(1)); } else { newEntries.add(e); } } else { newEntries.add(e); } advanceOrCancel(pm); } entries = newEntries; } private void rejoinModifies(ProgressMonitor pm) throws CanceledException { HashMap nameMap = new HashMap<>(); ArrayList newAdded = new ArrayList<>(added.size()); pm.beginTask(JGitText.get().renamesRejoiningModifies, added.size() + deleted.size()); for (DiffEntry src : deleted) { nameMap.put(src.oldPath, src); advanceOrCancel(pm); } for (DiffEntry dst : added) { DiffEntry src = nameMap.remove(dst.newPath); if (src != null) { if (sameType(src.oldMode, dst.newMode)) { entries.add(DiffEntry.pair(ChangeType.MODIFY, src, dst, src.score)); } else { nameMap.put(src.oldPath, src); newAdded.add(dst); } } else { newAdded.add(dst); } advanceOrCancel(pm); } added = newAdded; deleted = new ArrayList<>(nameMap.values()); } private int calculateModifyScore(ContentSource.Pair reader, DiffEntry d) throws IOException { try { SimilarityIndex src = new SimilarityIndex(); src.hash(reader.open(OLD, d)); src.sort(); SimilarityIndex dst = new SimilarityIndex(); dst.hash(reader.open(NEW, d)); dst.sort(); return src.score(dst, 100); } catch (TableFullException tableFull) { // If either table overflowed while being constructed, don't allow // the pair to be broken. Returning 1 higher than breakScore will // ensure its not similar, but not quite dissimilar enough to break. // overRenameLimit = true; return breakScore + 1; } } private void findContentRenames(ContentSource.Pair reader, ProgressMonitor pm) throws IOException, CanceledException { int cnt = Math.max(added.size(), deleted.size()); if (getRenameLimit() == 0 || cnt <= getRenameLimit()) { SimilarityRenameDetector d; d = new SimilarityRenameDetector(reader, deleted, added); d.setRenameScore(getRenameScore()); d.setBigFileThreshold(getBigFileThreshold()); d.setSkipBinaryFiles(getSkipContentRenamesForBinaryFiles()); d.compute(pm); overRenameLimit |= d.isTableOverflow(); deleted = d.getLeftOverSources(); added = d.getLeftOverDestinations(); entries.addAll(d.getMatches()); } else { overRenameLimit = true; } } @SuppressWarnings("unchecked") private void findExactRenames(ProgressMonitor pm) throws CanceledException { pm.beginTask(JGitText.get().renamesFindingExact, // added.size() + added.size() + deleted.size() + added.size() * deleted.size()); HashMap deletedMap = populateMap(deleted, pm); HashMap addedMap = populateMap(added, pm); ArrayList uniqueAdds = new ArrayList<>(added.size()); ArrayList> nonUniqueAdds = new ArrayList<>(); for (Object o : addedMap.values()) { if (o instanceof DiffEntry) uniqueAdds.add((DiffEntry) o); else nonUniqueAdds.add((List) o); } ArrayList left = new ArrayList<>(added.size()); for (DiffEntry a : uniqueAdds) { Object del = deletedMap.get(a.newId); if (del instanceof DiffEntry) { // We have one add to one delete: pair them if they are the same // type DiffEntry e = (DiffEntry) del; if (sameType(e.oldMode, a.newMode)) { e.changeType = ChangeType.RENAME; entries.add(exactRename(e, a)); } else { left.add(a); } } else if (del != null) { // We have one add to many deletes: find the delete with the // same type and closest name to the add, then pair them List list = (List) del; DiffEntry best = bestPathMatch(a, list); if (best != null) { best.changeType = ChangeType.RENAME; entries.add(exactRename(best, a)); } else { left.add(a); } } else { left.add(a); } advanceOrCancel(pm); } for (List adds : nonUniqueAdds) { Object o = deletedMap.get(adds.get(0).newId); if (o instanceof DiffEntry) { // We have many adds to one delete: find the add with the same // type and closest name to the delete, then pair them. Mark the // rest as copies of the delete. DiffEntry d = (DiffEntry) o; DiffEntry best = bestPathMatch(d, adds); if (best != null) { d.changeType = ChangeType.RENAME; entries.add(exactRename(d, best)); for (DiffEntry a : adds) { if (a != best) { if (sameType(d.oldMode, a.newMode)) { entries.add(exactCopy(d, a)); } else { left.add(a); } } } } else { left.addAll(adds); } } else if (o != null) { // We have many adds to many deletes: score all the adds against // all the deletes by path name, take the best matches, pair // them as renames, then call the rest copies List dels = (List) o; long[] matrix = new long[dels.size() * adds.size()]; int mNext = 0; for (int delIdx = 0; delIdx < dels.size(); delIdx++) { String deletedName = dels.get(delIdx).oldPath; for (int addIdx = 0; addIdx < adds.size(); addIdx++) { String addedName = adds.get(addIdx).newPath; int score = SimilarityRenameDetector.nameScore(addedName, deletedName); matrix[mNext] = SimilarityRenameDetector.encode(score, delIdx, addIdx); mNext++; if (pm.isCancelled()) { throw new CanceledException( JGitText.get().renameCancelled); } } } Arrays.sort(matrix); for (--mNext; mNext >= 0; mNext--) { long ent = matrix[mNext]; int delIdx = SimilarityRenameDetector.srcFile(ent); int addIdx = SimilarityRenameDetector.dstFile(ent); DiffEntry d = dels.get(delIdx); DiffEntry a = adds.get(addIdx); if (a == null) { advanceOrCancel(pm); continue; // was already matched earlier } ChangeType type; if (d.changeType == ChangeType.DELETE) { // First use of this source file. Tag it as a rename so we // later know it is already been used as a rename, other // matches (if any) will claim themselves as copies instead. // d.changeType = ChangeType.RENAME; type = ChangeType.RENAME; } else { type = ChangeType.COPY; } entries.add(DiffEntry.pair(type, d, a, 100)); adds.set(addIdx, null); // Claim the destination was matched. advanceOrCancel(pm); } } else { left.addAll(adds); } advanceOrCancel(pm); } added = left; deleted = new ArrayList<>(deletedMap.size()); for (Object o : deletedMap.values()) { if (o instanceof DiffEntry) { DiffEntry e = (DiffEntry) o; if (e.changeType == ChangeType.DELETE) deleted.add(e); } else { List list = (List) o; for (DiffEntry e : list) { if (e.changeType == ChangeType.DELETE) deleted.add(e); } } } pm.endTask(); } /** * Find the best match by file path for a given DiffEntry from a list of * DiffEntrys. The returned DiffEntry will be of the same type as * <src>. If no DiffEntry can be found that has the same type, this * method will return null. * * @param src * the DiffEntry to try to find a match for * @param list * a list of DiffEntrys to search through * @return the DiffEntry from <list> who's file path best matches * <src> */ private static DiffEntry bestPathMatch(DiffEntry src, List list) { DiffEntry best = null; int score = -1; for (DiffEntry d : list) { if (sameType(mode(d), mode(src))) { int tmp = SimilarityRenameDetector .nameScore(path(d), path(src)); if (tmp > score) { best = d; score = tmp; } } } return best; } @SuppressWarnings("unchecked") private HashMap populateMap( List diffEntries, ProgressMonitor pm) throws CanceledException { HashMap map = new HashMap<>(); for (DiffEntry de : diffEntries) { Object old = map.put(id(de), de); if (old instanceof DiffEntry) { ArrayList list = new ArrayList<>(2); list.add((DiffEntry) old); list.add(de); map.put(id(de), list); } else if (old != null) { // Must be a list of DiffEntries ((List) old).add(de); map.put(id(de), old); } advanceOrCancel(pm); } return map; } private static String path(DiffEntry de) { return de.changeType == ChangeType.DELETE ? de.oldPath : de.newPath; } private static FileMode mode(DiffEntry de) { return de.changeType == ChangeType.DELETE ? de.oldMode : de.newMode; } private static AbbreviatedObjectId id(DiffEntry de) { return de.changeType == ChangeType.DELETE ? de.oldId : de.newId; } static boolean sameType(FileMode a, FileMode b) { // Files have to be of the same type in order to rename them. // We would never want to rename a file to a gitlink, or a // symlink to a file. // int aType = a.getBits() & FileMode.TYPE_MASK; int bType = b.getBits() & FileMode.TYPE_MASK; return aType == bType; } private static DiffEntry exactRename(DiffEntry src, DiffEntry dst) { return DiffEntry.pair(ChangeType.RENAME, src, dst, EXACT_RENAME_SCORE); } private static DiffEntry exactCopy(DiffEntry src, DiffEntry dst) { return DiffEntry.pair(ChangeType.COPY, src, dst, EXACT_RENAME_SCORE); } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 1286 Content-Disposition: inline; filename="Sequence.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "f0a694aa2f54f4f30b93d1d4e5d4bb8b04d4a88e" /* * Copyright (C) 2010, Google Inc. * Copyright (C) 2008-2009, Johannes E. Schindelin and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; /** * Arbitrary sequence of elements. *
* A sequence of elements is defined to contain elements in the index range * [0, {@link #size()}), like a standard Java List implementation. * Unlike a List, the members of the sequence are not directly obtainable. *
* Implementations of Sequence are primarily intended for use in content * difference detection algorithms, to produce an * {@link org.eclipse.jgit.diff.EditList} of {@link org.eclipse.jgit.diff.Edit} * instances describing how two Sequence instances differ. *
* To be compared against another Sequence of the same type, a supporting * {@link org.eclipse.jgit.diff.SequenceComparator} must also be supplied. */ public abstract class Sequence { /** @return total number of items in the sequence. */ /** * Get size * * @return size */ public abstract int size(); } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 3061 Content-Disposition: inline; filename="SequenceComparator.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "d068cf5b4d4ab71bfd05e94406e6bd20740d01af" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; /** * Equivalence function for a {@link org.eclipse.jgit.diff.Sequence} compared by * difference algorithm. *
* Difference algorithms can use a comparator to compare portions of two * sequences and discover the minimal edits required to transform from one * sequence to the other sequence. *
* Indexes within a sequence are zero-based. * * @param * type of sequence the comparator supports. */ public abstract class SequenceComparator { /** * Compare two items to determine if they are equivalent. * * It is permissible to compare sequence {@code a} with itself (by passing * {@code a} again in position {@code b}). * * @param a * the first sequence. * @param ai * item of {@code ai} to compare. * @param b * the second sequence. * @param bi * item of {@code bi} to compare. * @return true if the two items are identical according to this function's * equivalence rule. */ public abstract boolean equals(S a, int ai, S b, int bi); /** * Get a hash value for an item in a sequence. * * If two items are equal according to this comparator's * {@link #equals(Sequence, int, Sequence, int)} method, then this hash * method must produce the same integer result for both items. * * It is not required for two items to have different hash values if they * are unequal according to the {@code equals()} method. * * @param seq * the sequence. * @param ptr * the item to obtain the hash for. * @return hash the hash value. */ public abstract int hash(S seq, int ptr); /** * Modify the edit to remove common leading and trailing items. * * The supplied edit {@code e} is reduced in size by moving the beginning A * and B points so the edit does not cover any items that are in common * between the two sequences. The ending A and B points are also shifted to * remove common items from the end of the region. * * @param a * the first sequence. * @param b * the second sequence. * @param e * the edit to start with and update. * @return {@code e} if it was updated in-place, otherwise a new edit * containing the reduced region. */ public Edit reduceCommonStartEnd(S a, S b, Edit e) { // Skip over items that are common at the start. // while (e.beginA < e.endA && e.beginB < e.endB && equals(a, e.beginA, b, e.beginB)) { e.beginA++; e.beginB++; } // Skip over items that are common at the end. // while (e.beginA < e.endA && e.beginB < e.endB && equals(a, e.endA - 1, b, e.endB - 1)) { e.endA--; e.endB--; } return e; } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 10827 Content-Disposition: inline; filename="SimilarityIndex.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "34581aefb5f8e5175ff4eaa40564430558afae11" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.lib.ObjectStream; /** * Index structure of lines/blocks in one file. *
* This structure can be used to compute an approximation of the similarity * between two files. The index is used by * {@link org.eclipse.jgit.diff.SimilarityRenameDetector} to compute scores * between files. *
* To save space in memory, this index uses a space efficient encoding which * will not exceed 1 MiB per instance. The index starts out at a smaller size * (closer to 2 KiB), but may grow as more distinct blocks within the scanned * file are discovered. * * @since 4.0 */ public class SimilarityIndex { /** A special {@link TableFullException} used in place of OutOfMemoryError. */ public static final TableFullException TABLE_FULL_OUT_OF_MEMORY = new TableFullException(); /** * Shift to apply before storing a key. *
* Within the 64 bit table record space, we leave the highest bit unset so * all values are positive. The lower 32 bits to count bytes. */ private static final int KEY_SHIFT = 32; /** Maximum value of the count field, also mask to extract the count. */ private static final long MAX_COUNT = (1L << KEY_SHIFT) - 1; /** * Total amount of bytes hashed into the structure, including \n. This is * usually the size of the file minus number of CRLF encounters. */ private long hashedCnt; /** Number of non-zero entries in {@link #idHash}. */ private int idSize; /** {@link #idSize} that triggers {@link #idHash} to double in size. */ private int idGrowAt; /** * Pairings of content keys and counters. *
* Slots in the table are actually two ints wedged into a single long. The * upper 32 bits stores the content key, and the remaining lower bits stores * the number of bytes associated with that key. Empty slots are denoted by * 0, which cannot occur because the count cannot be 0. Values can only be * positive, which we enforce during key addition. */ private long[] idHash; /** {@code idHash.length == 1 << idHashBits}. */ private int idHashBits; /** * Create a new similarity index for the given object * * @param obj * the object to hash * @return similarity index for this object * @throws java.io.IOException * file contents cannot be read from the repository. * @throws org.eclipse.jgit.diff.SimilarityIndex.TableFullException * object hashing overflowed the storage capacity of the * SimilarityIndex. */ public static SimilarityIndex create(ObjectLoader obj) throws IOException, TableFullException { SimilarityIndex idx = new SimilarityIndex(); idx.hash(obj); idx.sort(); return idx; } SimilarityIndex() { idHashBits = 8; idHash = new long[1 << idHashBits]; idGrowAt = growAt(idHashBits); } static boolean isBinary(ObjectLoader obj) throws IOException { if (obj.isLarge()) { try (ObjectStream in1 = obj.openStream()) { return RawText.isBinary(in1); } } byte[] raw = obj.getCachedBytes(); return RawText.isBinary(raw, raw.length, true); } void hash(ObjectLoader obj) throws MissingObjectException, IOException, TableFullException { if (obj.isLarge()) { hashLargeObject(obj); } else { byte[] raw = obj.getCachedBytes(); hash(raw, 0, raw.length); } } private void hashLargeObject(ObjectLoader obj) throws IOException, TableFullException { boolean text; text = !isBinary(obj); try (ObjectStream in2 = obj.openStream()) { hash(in2, in2.getSize(), text); } } void hash(byte[] raw, int ptr, int end) throws TableFullException { final boolean text = !RawText.isBinary(raw, raw.length, true); hashedCnt = 0; while (ptr < end) { int hash = 5381; int blockHashedCnt = 0; int start = ptr; // Hash one line, or one block, whichever occurs first. do { int c = raw[ptr++] & 0xff; // Ignore CR in CRLF sequence if text if (text && c == '\r' && ptr < end && raw[ptr] == '\n') continue; blockHashedCnt++; if (c == '\n') break; hash = (hash << 5) + hash + c; } while (ptr < end && ptr - start < 64); hashedCnt += blockHashedCnt; add(hash, blockHashedCnt); } } void hash(InputStream in, long remaining, boolean text) throws IOException, TableFullException { byte[] buf = new byte[4096]; int ptr = 0; int cnt = 0; while (0 < remaining) { int hash = 5381; int blockHashedCnt = 0; // Hash one line, or one block, whichever occurs first. int n = 0; do { if (ptr == cnt) { ptr = 0; cnt = in.read(buf, 0, buf.length); if (cnt <= 0) throw new EOFException(); } n++; int c = buf[ptr++] & 0xff; // Ignore CR in CRLF sequence if text if (text && c == '\r' && ptr < cnt && buf[ptr] == '\n') continue; blockHashedCnt++; if (c == '\n') break; hash = (hash << 5) + hash + c; } while (n < 64 && n < remaining); hashedCnt += blockHashedCnt; add(hash, blockHashedCnt); remaining -= n; } } /** * Sort the internal table so it can be used for efficient scoring. *
* Once sorted, additional lines/blocks cannot be added to the index. */ void sort() { // Sort the array. All of the empty space will wind up at the front, // because we forced all of the keys to always be positive. Later // we only work with the back half of the array. // Arrays.sort(idHash); } /** * Compute the similarity score between this index and another. *
* A region of a file is defined as a line in a text file or a fixed-size * block in a binary file. To prepare an index, each region in the file is * hashed; the values and counts of hashes are retained in a sorted table. * Define the similarity fraction F as the count of matching regions * between the two files divided between the maximum count of regions in * either file. The similarity score is F multiplied by the maxScore * constant, yielding a range [0, maxScore]. It is defined as maxScore for * the degenerate case of two empty files. *
* The similarity score is symmetrical; i.e. a.score(b) == b.score(a). * * @param dst * the other index * @param maxScore * the score representing a 100% match * @return the similarity score */ public int score(SimilarityIndex dst, int maxScore) { long max = Math.max(hashedCnt, dst.hashedCnt); if (max == 0) return maxScore; return (int) ((common(dst) * maxScore) / max); } long common(SimilarityIndex dst) { return common(this, dst); } private static long common(SimilarityIndex src, SimilarityIndex dst) { int srcIdx = src.packedIndex(0); int dstIdx = dst.packedIndex(0); long[] srcHash = src.idHash; long[] dstHash = dst.idHash; return common(srcHash, srcIdx, dstHash, dstIdx); } private static long common(long[] srcHash, int srcIdx, // long[] dstHash, int dstIdx) { if (srcIdx == srcHash.length || dstIdx == dstHash.length) return 0; long common = 0; int srcKey = keyOf(srcHash[srcIdx]); int dstKey = keyOf(dstHash[dstIdx]); for (;;) { if (srcKey == dstKey) { common += Math.min(countOf(srcHash[srcIdx]), countOf(dstHash[dstIdx])); if (++srcIdx == srcHash.length) break; srcKey = keyOf(srcHash[srcIdx]); if (++dstIdx == dstHash.length) break; dstKey = keyOf(dstHash[dstIdx]); } else if (srcKey < dstKey) { // Regions of src which do not appear in dst. if (++srcIdx == srcHash.length) break; srcKey = keyOf(srcHash[srcIdx]); } else /* if (dstKey < srcKey) */{ // Regions of dst which do not appear in src. if (++dstIdx == dstHash.length) break; dstKey = keyOf(dstHash[dstIdx]); } } return common; } // Testing only int size() { return idSize; } // Testing only int key(int idx) { return keyOf(idHash[packedIndex(idx)]); } // Testing only long count(int idx) { return countOf(idHash[packedIndex(idx)]); } // Brute force approach only for testing. int findIndex(int key) { for (int i = 0; i < idSize; i++) if (key(i) == key) return i; return -1; } private int packedIndex(int idx) { return (idHash.length - idSize) + idx; } void add(int key, int cnt) throws TableFullException { key = (key * 0x9e370001) >>> 1; // Mix bits and ensure not negative. int j = slot(key); for (;;) { long v = idHash[j]; if (v == 0) { // Empty slot in the table, store here. if (idGrowAt <= idSize) { grow(); j = slot(key); continue; } idHash[j] = pair(key, cnt); idSize++; return; } else if (keyOf(v) == key) { // Same key, increment the counter. If it overflows, fail // indexing to prevent the key from being impacted. // idHash[j] = pair(key, countOf(v) + cnt); return; } else if (++j >= idHash.length) { j = 0; } } } private static long pair(int key, long cnt) throws TableFullException { if (MAX_COUNT < cnt) throw new TableFullException(); return (((long) key) << KEY_SHIFT) | cnt; } private int slot(int key) { // We use 31 - idHashBits because the upper bit was already forced // to be 0 and we want the remaining high bits to be used as the // table slot. // return key >>> (31 - idHashBits); } private static int growAt(int idHashBits) { return (1 << idHashBits) * (idHashBits - 3) / idHashBits; } @SuppressWarnings("UnusedException") private void grow() throws TableFullException { if (idHashBits == 30) throw new TableFullException(); long[] oldHash = idHash; int oldSize = idHash.length; idHashBits++; idGrowAt = growAt(idHashBits); try { idHash = new long[1 << idHashBits]; } catch (OutOfMemoryError noMemory) { throw TABLE_FULL_OUT_OF_MEMORY; } for (int i = 0; i < oldSize; i++) { long v = oldHash[i]; if (v != 0) { int j = slot(keyOf(v)); while (idHash[j] != 0) if (++j >= idHash.length) j = 0; idHash[j] = v; } } } private static int keyOf(long v) { return (int) (v >>> KEY_SHIFT); } private static long countOf(long v) { return v & MAX_COUNT; } /** Thrown by {@code create()} when file is too large. */ public static class TableFullException extends Exception { private static final long serialVersionUID = 1L; } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 11672 Content-Disposition: inline; filename="SimilarityRenameDetector.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "fb98df7c9e8ab67f676151149ea5b483d1a8be74" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import static org.eclipse.jgit.diff.DiffEntry.Side.NEW; import static org.eclipse.jgit.diff.DiffEntry.Side.OLD; import static org.eclipse.jgit.storage.pack.PackConfig.DEFAULT_BIG_FILE_THRESHOLD; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.List; import org.eclipse.jgit.api.errors.CanceledException; import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.diff.SimilarityIndex.TableFullException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.lib.ProgressMonitor; class SimilarityRenameDetector { /** * Number of bits we need to express an index into src or dst list. *
* This must be 28, giving us a limit of 2^28 entries in either list, which * is an insane limit of 536,870,912 file names being considered in a single * rename pass. The other 8 bits are used to store the score, while staying * under 127 so the long doesn't go negative. */ private static final int BITS_PER_INDEX = 28; private static final int INDEX_MASK = (1 << BITS_PER_INDEX) - 1; private static final int SCORE_SHIFT = 2 * BITS_PER_INDEX; private ContentSource.Pair reader; /** * All sources to consider for copies or renames. *
* A source is typically a {@link ChangeType#DELETE} change, but could be * another type when trying to perform copy detection concurrently with * rename detection. */ private List srcs; /** * All destinations to consider looking for a rename. *
* A destination is typically an {@link ChangeType#ADD}, as the name has * just come into existence, and we want to discover where its initial * content came from. */ private List dsts; /** * Matrix of all examined file pairs, and their scores. *
~~* The upper 8 bits of each long stores the score, but the score is bounded * to be in the range (0, 128] so that the highest bit is never set, and all * entries are therefore positive. *~~
* List indexes to an element of {@link #srcs} and {@link #dsts} are encoded * as the lower two groups of 28 bits, respectively, but the encoding is * inverted, so that 0 is expressed as {@code (1 << 28) - 1}. This sorts * lower list indices later in the matrix, giving precedence to files whose * names sort earlier in the tree. */ private long[] matrix; /** Score a pair must exceed to be considered a rename. */ private int renameScore = 50; /** * File size threshold (in bytes) for detecting renames. Files larger * than this size will not be processed for renames. */ private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD; /** Skip content renames for binary files. */ private boolean skipBinaryFiles = false; /** Set if any {@link SimilarityIndex.TableFullException} occurs. */ private boolean tableOverflow; private List out; SimilarityRenameDetector(ContentSource.Pair reader, List srcs, List dsts) { this.reader = reader; this.srcs = srcs; this.dsts = dsts; } void setRenameScore(int score) { renameScore = score; } void setBigFileThreshold(int threshold) { bigFileThreshold = threshold; } void setSkipBinaryFiles(boolean value) { skipBinaryFiles = value; } void compute(ProgressMonitor pm) throws IOException, CanceledException { if (pm == null) pm = NullProgressMonitor.INSTANCE; pm.beginTask(JGitText.get().renamesFindingByContent, // 2 * srcs.size() * dsts.size()); int mNext = buildMatrix(pm); out = new ArrayList<>(Math.min(mNext, dsts.size())); // Match rename pairs on a first come, first serve basis until // we have looked at everything that is above our minimum score. // for (--mNext; mNext >= 0; mNext--) { if (pm.isCancelled()) { throw new CanceledException(JGitText.get().renameCancelled); } long ent = matrix[mNext]; int sIdx = srcFile(ent); int dIdx = dstFile(ent); DiffEntry s = srcs.get(sIdx); DiffEntry d = dsts.get(dIdx); if (d == null) { pm.update(1); continue; // was already matched earlier } ChangeType type; if (s.changeType == ChangeType.DELETE) { // First use of this source file. Tag it as a rename so we // later know it is already been used as a rename, other // matches (if any) will claim themselves as copies instead. // s.changeType = ChangeType.RENAME; type = ChangeType.RENAME; } else { type = ChangeType.COPY; } out.add(DiffEntry.pair(type, s, d, score(ent))); dsts.set(dIdx, null); // Claim the destination was matched. pm.update(1); } srcs = compactSrcList(srcs); dsts = compactDstList(dsts); pm.endTask(); } List getMatches() { return out; } List getLeftOverSources() { return srcs; } List getLeftOverDestinations() { return dsts; } boolean isTableOverflow() { return tableOverflow; } private static List compactSrcList(List in) { ArrayList r = new ArrayList<>(in.size()); for (DiffEntry e : in) { if (e.changeType == ChangeType.DELETE) r.add(e); } return r; } private static List compactDstList(List in) { ArrayList r = new ArrayList<>(in.size()); for (DiffEntry e : in) { if (e != null) r.add(e); } return r; } private int buildMatrix(ProgressMonitor pm) throws IOException, CanceledException { // Allocate for the worst-case scenario where every pair has a // score that we need to consider. We might not need that many. // matrix = new long[srcs.size() * dsts.size()]; long[] srcSizes = new long[srcs.size()]; long[] dstSizes = new long[dsts.size()]; BitSet dstTooLarge = null; // Consider each pair of files, if the score is above the minimum // threshold we need record that scoring in the matrix so we can // later find the best matches. // int mNext = 0; SRC: for (int srcIdx = 0; srcIdx < srcs.size(); srcIdx++) { DiffEntry srcEnt = srcs.get(srcIdx); if (!isFile(srcEnt.oldMode)) { pm.update(dsts.size()); continue; } SimilarityIndex s = null; for (int dstIdx = 0; dstIdx < dsts.size(); dstIdx++) { if (pm.isCancelled()) { throw new CanceledException( JGitText.get().renameCancelled); } DiffEntry dstEnt = dsts.get(dstIdx); if (!isFile(dstEnt.newMode)) { pm.update(1); continue; } if (!RenameDetector.sameType(srcEnt.oldMode, dstEnt.newMode)) { pm.update(1); continue; } if (dstTooLarge != null && dstTooLarge.get(dstIdx)) { pm.update(1); continue; } long srcSize = srcSizes[srcIdx]; if (srcSize == 0) { srcSize = size(OLD, srcEnt) + 1; srcSizes[srcIdx] = srcSize; } long dstSize = dstSizes[dstIdx]; if (dstSize == 0) { dstSize = size(NEW, dstEnt) + 1; dstSizes[dstIdx] = dstSize; } long max = Math.max(srcSize, dstSize); long min = Math.min(srcSize, dstSize); if (min * 100 / max < renameScore) { // Cannot possibly match, as the file sizes are so different pm.update(1); continue; } if (max > bigFileThreshold) { pm.update(1); continue; } if (s == null) { try { ObjectLoader loader = reader.open(OLD, srcEnt); if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) { pm.update(1); continue SRC; } s = hash(loader); } catch (TableFullException tableFull) { tableOverflow = true; continue SRC; } } SimilarityIndex d; try { ObjectLoader loader = reader.open(NEW, dstEnt); if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) { pm.update(1); continue; } d = hash(loader); } catch (TableFullException tableFull) { if (dstTooLarge == null) dstTooLarge = new BitSet(dsts.size()); dstTooLarge.set(dstIdx); tableOverflow = true; pm.update(1); continue; } int contentScore = s.score(d, 10000); // nameScore returns a value between 0 and 100, but we want it // to be in the same range as the content score. This allows it // to be dropped into the pretty formula for the final score. int nameScore = nameScore(srcEnt.oldPath, dstEnt.newPath) * 100; int score = (contentScore * 99 + nameScore * 1) / 10000; if (score < renameScore) { pm.update(1); continue; } matrix[mNext++] = encode(score, srcIdx, dstIdx); pm.update(1); } } // Sort everything in the range we populated, which might be the // entire matrix, or just a smaller slice if we had some bad low // scoring pairs. // Arrays.sort(matrix, 0, mNext); return mNext; } static int nameScore(String a, String b) { int aDirLen = a.lastIndexOf('/') + 1; int bDirLen = b.lastIndexOf('/') + 1; int dirMin = Math.min(aDirLen, bDirLen); int dirMax = Math.max(aDirLen, bDirLen); final int dirScoreLtr; final int dirScoreRtl; if (dirMax == 0) { dirScoreLtr = 100; dirScoreRtl = 100; } else { int dirSim = 0; for (; dirSim < dirMin; dirSim++) { if (a.charAt(dirSim) != b.charAt(dirSim)) break; } dirScoreLtr = (dirSim * 100) / dirMax; if (dirScoreLtr == 100) { dirScoreRtl = 100; } else { for (dirSim = 0; dirSim < dirMin; dirSim++) { if (a.charAt(aDirLen - 1 - dirSim) != b.charAt(bDirLen - 1 - dirSim)) break; } dirScoreRtl = (dirSim * 100) / dirMax; } } int fileMin = Math.min(a.length() - aDirLen, b.length() - bDirLen); int fileMax = Math.max(a.length() - aDirLen, b.length() - bDirLen); int fileSim = 0; for (; fileSim < fileMin; fileSim++) { if (a.charAt(a.length() - 1 - fileSim) != b.charAt(b.length() - 1 - fileSim)) break; } int fileScore = (fileSim * 100) / fileMax; return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100; } private SimilarityIndex hash(ObjectLoader objectLoader) throws IOException, TableFullException { SimilarityIndex r = new SimilarityIndex(); r.hash(objectLoader); r.sort(); return r; } private long size(DiffEntry.Side side, DiffEntry ent) throws IOException { return reader.size(side, ent); } private static int score(long value) { return (int) (value >>> SCORE_SHIFT); } static int srcFile(long value) { return decodeFile(((int) (value >>> BITS_PER_INDEX)) & INDEX_MASK); } static int dstFile(long value) { return decodeFile(((int) value) & INDEX_MASK); } static long encode(int score, int srcIdx, int dstIdx) { return (((long) score) << SCORE_SHIFT) // | (encodeFile(srcIdx) << BITS_PER_INDEX) // | encodeFile(dstIdx); } @SuppressWarnings("IntLongMath") private static long encodeFile(int idx) { // We invert the index so that the first file in the list sorts // later in the table. This permits us to break ties favoring // earlier names over later ones. // return INDEX_MASK - idx; } @SuppressWarnings("IntLongMath") private static int decodeFile(int v) { return INDEX_MASK - v; } private static boolean isFile(FileMode mode) { return (mode.getBits() & FileMode.TYPE_MASK) == FileMode.TYPE_FILE; } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 3821 Content-Disposition: inline; filename="Subsequence.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "878c66b30f2e074ebcd3166694cea87a23950ccc" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; /** * Wraps a {@link org.eclipse.jgit.diff.Sequence} to have a narrower range of * elements. *
* This sequence acts as a proxy for the real sequence, translating element * indexes on the fly by adding {@code begin} to them. Sequences of this type * must be used with a {@link org.eclipse.jgit.diff.SubsequenceComparator}. * * @param * the base sequence type. */ public final class Subsequence extends Sequence { /** * Construct a subsequence around the A region/base sequence. * * @param * type of returned Sequence * @param a * the A sequence. * @param region * the region of {@code a} to create a subsequence around. * @return subsequence of {@code base} as described by A in {@code region}. */ public static Subsequence a(S a, Edit region) { return new Subsequence<>(a, region.beginA, region.endA); } /** * Construct a subsequence around the B region/base sequence. * * @param * type of returned Sequence * @param b * the B sequence. * @param region * the region of {@code b} to create a subsequence around. * @return subsequence of {@code base} as described by B in {@code region}. */ public static Subsequence b(S b, Edit region) { return new Subsequence<>(b, region.beginB, region.endB); } /** * Adjust the Edit to reflect positions in the base sequence. * * @param * type of returned Sequence * @param e * edit to adjust in-place. Prior to invocation the indexes are * in terms of the two subsequences; after invocation the indexes * are in terms of the base sequences. * @param a * the A sequence. * @param b * the B sequence. */ public static void toBase(Edit e, Subsequence a, Subsequence b) { e.beginA += a.begin; e.endA += a.begin; e.beginB += b.begin; e.endB += b.begin; } /** * Adjust the Edits to reflect positions in the base sequence. * * @param * type of returned Sequence * @param edits * edits to adjust in-place. Prior to invocation the indexes are * in terms of the two subsequences; after invocation the indexes * are in terms of the base sequences. * @param a * the A sequence. * @param b * the B sequence. * @return always {@code edits} (as the list was updated in-place). */ public static EditList toBase(EditList edits, Subsequence a, Subsequence b) { for (Edit e : edits) toBase(e, a, b); return edits; } final S base; final int begin; private final int size; /** * Construct a subset of another sequence. * * The size of the subsequence will be {@code end - begin}. * * @param base * the real sequence. * @param begin * First element index of {@code base} that will be part of this * new subsequence. The element at {@code begin} will be this * sequence's element 0. * @param end * One past the last element index of {@code base} that will be * part of this new subsequence. */ public Subsequence(S base, int begin, int end) { this.base = base; this.begin = begin; this.size = end - begin; } @Override public int size() { return size; } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 1375 Content-Disposition: inline; filename="SubsequenceComparator.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "707df59259ad6bc4d73335f2b36d4a0b228e7d9a" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; /** * Wrap another comparator for use with * {@link org.eclipse.jgit.diff.Subsequence}. *
* This comparator acts as a proxy for the real comparator, translating element * indexes on the fly by adding the subsequence's begin offset to them. * Comparators of this type must be used with a * {@link org.eclipse.jgit.diff.Subsequence}. * * @param * the base sequence type. */ public final class SubsequenceComparator extends SequenceComparator> { private final SequenceComparator cmp; /** * Construct a comparator wrapping another comparator. * * @param cmp * the real comparator. */ public SubsequenceComparator(SequenceComparator cmp) { this.cmp = cmp; } @Override public boolean equals(Subsequence a, int ai, Subsequence b, int bi) { return cmp.equals(a.base, ai + a.begin, b.base, bi + b.begin); } @Override public int hash(Subsequence seq, int ptr) { return cmp.hash(seq.base, ptr + seq.begin); } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 86 Content-Disposition: inline; filename="package-info.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "98c44e11a19fdd4ce651ba868a8e31512831fd2f" /** * Comparing file contents by computing diffs. */ package org.eclipse.jgit.diff;