/* * Copyright (C) 2010, 2021 Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; import java.io.BufferedInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.ObjectStream; import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.treewalk.WorkingTreeIterator; import org.eclipse.jgit.treewalk.filter.PathFilter; /** * Supplies the content of a file for * {@link org.eclipse.jgit.diff.DiffFormatter}. *
* A content source is not thread-safe. Sources may contain state, including * information about the last ObjectLoader they returned. Callers must be * careful to ensure there is no more than one ObjectLoader pending on any * source, at any time. */ public abstract class ContentSource { /** * Construct a content source for an ObjectReader. * * @param reader * the reader to obtain blobs from. * @return a source wrapping the reader. */ public static ContentSource create(ObjectReader reader) { return new ObjectReaderSource(reader); } /** * Construct a content source for a working directory. * * If the iterator is a {@link org.eclipse.jgit.treewalk.FileTreeIterator} * an optimized version is used that doesn't require seeking through a * TreeWalk. * * @param iterator * the iterator to obtain source files through. * @return a content source wrapping the iterator. */ public static ContentSource create(WorkingTreeIterator iterator) { return new WorkingTreeSource(iterator); } /** * Determine the size of the object. * * @param path * the path of the file, relative to the root of the repository. * @param id * blob id of the file, if known. * @return the size in bytes. * @throws java.io.IOException * the file cannot be accessed. */ public abstract long size(String path, ObjectId id) throws IOException; /** * Open the object. * * @param path * the path of the file, relative to the root of the repository. * @param id * blob id of the file, if known. * @return a loader that can supply the content of the file. The loader must * be used before another loader can be obtained from this same * source. * @throws java.io.IOException * the file cannot be accessed. */ public abstract ObjectLoader open(String path, ObjectId id) throws IOException; /** * Closes the used resources like ObjectReader, TreeWalk etc. Default * implementation does nothing. * * @since 6.2 */ public void close() { // Do nothing } /** * Checks if the source is from "working tree", so it can be accessed as a * file directly. * * @since 6.2 * * @return true if working tree source and false otherwise (loader must be * used) */ public boolean isWorkingTreeSource() { return false; } private static class ObjectReaderSource extends ContentSource { private final ObjectReader reader; ObjectReaderSource(ObjectReader reader) { this.reader = reader; } @Override public long size(String path, ObjectId id) throws IOException { try { return reader.getObjectSize(id, Constants.OBJ_BLOB); } catch (MissingObjectException ignore) { return 0; } } @Override public ObjectLoader open(String path, ObjectId id) throws IOException { return reader.open(id, Constants.OBJ_BLOB); } @Override public void close() { reader.close(); } @Override public boolean isWorkingTreeSource() { return false; } } private static class WorkingTreeSource extends ContentSource { private final TreeWalk tw; private final WorkingTreeIterator iterator; private String current; WorkingTreeIterator ptr; WorkingTreeSource(WorkingTreeIterator iterator) { this.tw = new TreeWalk(iterator.getRepository(), (ObjectReader) null); this.tw.setRecursive(true); this.iterator = iterator; } @Override public long size(String path, ObjectId id) throws IOException { seek(path); return ptr.getEntryLength(); } @Override public ObjectLoader open(String path, ObjectId id) throws IOException { seek(path); long entrySize = ptr.getEntryContentLength(); return new ObjectLoader() { @Override public long getSize() { return entrySize; } @Override public int getType() { return ptr.getEntryFileMode().getObjectType(); } @Override public ObjectStream openStream() throws MissingObjectException, IOException { long contentLength = entrySize; InputStream in = ptr.openEntryStream(); in = new BufferedInputStream(in); return new ObjectStream.Filter(getType(), contentLength, in); } @Override public boolean isLarge() { return true; } @Override public byte[] getCachedBytes() throws LargeObjectException { throw new LargeObjectException(); } }; } private void seek(String path) throws IOException { if (!path.equals(current)) { iterator.reset(); // Possibly this iterator had an associated DirCacheIterator, // but we have no access to it and thus don't know about it. // We have to reset this iterator here to work without // DirCacheIterator and to descend always into ignored // directories. Otherwise we might not find tracked files below // ignored folders. Since we're looking only for a single // specific path this is not a performance problem. iterator.setWalkIgnoredDirectories(true); iterator.setDirCacheIterator(null, -1); tw.reset(); tw.addTree(iterator); tw.setFilter(PathFilter.create(path)); current = path; if (!tw.next()) throw new FileNotFoundException(path); ptr = tw.getTree(0, WorkingTreeIterator.class); if (ptr == null) throw new FileNotFoundException(path); } } @Override public void close() { tw.close(); } @Override public boolean isWorkingTreeSource() { return true; } } /** A pair of sources to access the old and new sides of a DiffEntry. */ public static final class Pair { private final ContentSource oldSource; private final ContentSource newSource; /** * Construct a pair of sources. * * @param oldSource * source to read the old side of a DiffEntry. * @param newSource * source to read the new side of a DiffEntry. */ public Pair(ContentSource oldSource, ContentSource newSource) { this.oldSource = oldSource; this.newSource = newSource; } /** * Determine the size of the object. * * @param side * which side of the entry to read (OLD or NEW). * @param ent * the entry to examine. * @return the size in bytes. * @throws IOException * the file cannot be accessed. */ public long size(DiffEntry.Side side, DiffEntry ent) throws IOException { switch (side) { case OLD: return oldSource.size(ent.oldPath, ent.oldId.toObjectId()); case NEW: return newSource.size(ent.newPath, ent.newId.toObjectId()); default: throw new IllegalArgumentException(); } } /** * Open the object. * * @param side * which side of the entry to read (OLD or NEW). * @param ent * the entry to examine. * @return a loader that can supply the content of the file. The loader * must be used before another loader can be obtained from this * same source. * @throws IOException * the file cannot be accessed. */ public ObjectLoader open(DiffEntry.Side side, DiffEntry ent) throws IOException { switch (side) { case OLD: return oldSource.open(ent.oldPath, ent.oldId.toObjectId()); case NEW: return newSource.open(ent.newPath, ent.newId.toObjectId()); default: throw new IllegalArgumentException(); } } /** * Closes used resources. * * @since 6.2 */ public void close() { oldSource.close(); newSource.close(); } /** * Checks if source (side) is a "working tree". * * @since 6.2 * * @param side * which side of the entry to read (OLD or NEW). * @return is the source a "working tree" * */ public boolean isWorkingTreeSource(DiffEntry.Side side) { switch (side) { case OLD: return oldSource.isWorkingTreeSource(); case NEW: return newSource.isWorkingTreeSource(); default: throw new IllegalArgumentException(); } } } } X-Content-Type-Options: nosniff Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Length: 7349 Content-Disposition: inline; filename="DiffAlgorithm.java" Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT Expires: Sat, 05 Jul 2025 19:32:54 GMT ETag: "7821efd2a6d6ae6056880ff031a59656e06118d8" /* * Copyright (C) 2010, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.diff; /** * Compares two {@link org.eclipse.jgit.diff.Sequence}s to create an * {@link org.eclipse.jgit.diff.EditList} of changes. *
* An algorithm's {@code diff} method must be callable from concurrent threads
* without data collisions. This permits some algorithms to use a singleton
* pattern, with concurrent invocations using the same singleton. Other
* algorithms may support parameterization, in which case the caller can create
* a unique instance per thread.
*/
public abstract class DiffAlgorithm {
/**
* Supported diff algorithm
*/
public enum SupportedAlgorithm {
/**
* Myers diff algorithm
*/
MYERS,
/**
* Histogram diff algorithm
*/
HISTOGRAM
}
/**
* Get diff algorithm
*
* @param alg
* the diff algorithm for which an implementation should be
* returned
* @return an implementation of the specified diff algorithm
*/
public static DiffAlgorithm getAlgorithm(SupportedAlgorithm alg) {
switch (alg) {
case MYERS:
return MyersDiff.INSTANCE;
case HISTOGRAM:
return new HistogramDiff();
default:
throw new IllegalArgumentException();
}
}
/**
* Compare two sequences and identify a list of edits between them.
*
* @param
* type of sequence being compared.
* @param cmp
* the comparator supplying the element equivalence function.
* @param a
* the first (also known as old or pre-image) sequence. Edits
* returned by this algorithm will reference indexes using the
* 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
* {@link org.eclipse.jgit.diff.Edit#getEndA()}.
* @param b
* the second (also known as new or post-image) sequence. Edits
* returned by this algorithm will reference indexes using the
* 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
* {@link org.eclipse.jgit.diff.Edit#getEndB()}.
* @return a modifiable edit list comparing the two sequences. If empty, the
* sequences are identical according to {@code cmp}'s rules. The
* result list is never null.
*/
public EditList diff(
SequenceComparator super S> cmp, S a, S b) {
Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b));
switch (region.getType()) {
case INSERT:
case DELETE:
return EditList.singleton(region);
case REPLACE: {
if (region.getLengthA() == 1 && region.getLengthB() == 1)
return EditList.singleton(region);
SubsequenceComparator cs = new SubsequenceComparator<>(cmp);
Subsequence as = Subsequence.a(a, region);
Subsequence bs = Subsequence.b(b, region);
EditList e = Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs);
return normalize(cmp, e, a, b);
}
case EMPTY:
return new EditList(0);
default:
throw new IllegalStateException();
}
}
private static Edit coverEdit(S a, S b) {
return new Edit(0, a.size(), 0, b.size());
}
/**
* Reorganize an {@link EditList} for better diff consistency.
*
* {@code DiffAlgorithms} may return {@link Edit.Type#INSERT} or * {@link Edit.Type#DELETE} edits that can be "shifted". For * example, the deleted section *
* -a * -b * -c * a * b * c ** can be shifted down by 1, 2 or 3 locations. *
* To avoid later merge issues, we shift such edits to a * consistent location. {@code normalize} uses a simple strategy of * shifting such edits to their latest possible location. *
* This strategy may not always produce an aesthetically pleasing * diff. For instance, it works well with *
* function1 { * ... * } * * +function2 { * + ... * +} * + * function3 { * ... * } ** but less so for *
* # * # comment1 * # * function1() { * } * * # * +# comment3 * +# * +function3() { * +} * + * +# * # comment2 * # * function2() { * } ** More * sophisticated strategies are possible, say by calculating a * suitable "aesthetic cost" for each possible position and using * the lowest cost, but {@code normalize} just shifts edits * to the end as much as possible. * * @param
* When writing or updating patterns, assume the contents are syntactically
* correct. Patterns can be simple and need not cover all syntactical corner
* cases, as long as they are sufficiently permissive.
*
* @since 6.10.1
*/
@SuppressWarnings({"ImmutableEnumChecker", "nls"})
public enum DiffDriver {
/**
* Built-in diff driver for c++
*/
cpp(List.of(
/* Jump targets or access declarations */
"^[ \\t]*[A-Za-z_][A-Za-z_0-9]*:\\s*($|/[/*])"), List.of(
/* functions/methods, variables, and compounds at top level */
"^((::\\s*)?[A-Za-z_].*)$")),
/**
* Built-in diff driver for device
* tree files
*/
dts(List.of(";", "="), List.of(
/* lines beginning with a word optionally preceded by '&' or the root */
"^[ \\t]*((/[ \\t]*\\{|&?[a-zA-Z_]).*)")),
/**
* Built-in diff driver for java
*/
java(List.of(
"^[ \\t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)"),
List.of(
/* Class, enum, interface, and record declarations */
"^[ \\t]*(([a-z-]+[ \\t]+)*(class|enum|interface|record)[ \\t]+.*)$",
/* Method definitions; note that constructor signatures are not */
/* matched because they are indistinguishable from method calls. */
"^[ \\t]*(([A-Za-z_<>&\\]\\[][?&<>.,A-Za-z_0-9]*[ \\t]+)+[A-Za-z_]"
+ "[A-Za-z_0-9]*[ \\t]*\\([^;]*)$")),
/**
* Built-in diff driver for
* python
*/
python(List.of("^[ \\t]*((class|(async[ \\t]+)?def)[ \\t].*)$")),
/**
* Built-in diff driver for
* rust
*/
rust(List.of("^[\\t ]*((pub(\\([^\\)]+\\))?[\\t ]+)?"
+ "((async|const|unsafe|extern([\\t ]+\"[^\"]+\"))[\\t ]+)?"
+ "(struct|enum|union|mod|trait|fn|impl|macro_rules!)[< \\t]+[^;]*)$"));
private final List
* The meaning of the old name can differ depending on the semantic meaning
* of this patch:
*
* The meaning of the new name can differ depending on the semantic meaning
* of this patch:
*
* Note that 0 corresponds to filterA because it was the first filter that
* was passed to scan.
*
* To query more than one flag at once, see {@link #getTreeFilterMarks()}.
*
* @param index
* the index of the tree filter to check for (must be between 0
* and {@link java.lang.Integer#SIZE}).
* @since 2.3
* @return a boolean.
*/
public boolean isMarked(int index) {
return (treeFilterMarks & (1L << index)) != 0;
}
/**
* Get the raw tree filter marks, as set during
* {@link #scan(TreeWalk, boolean, TreeFilter...)}. See
* {@link #isMarked(int)} to query each mark individually.
*
* @return the bitset of tree filter marks
* @since 2.3
*/
public int getTreeFilterMarks() {
return treeFilterMarks;
}
/**
* Get the object id.
*
* @param side
* the side of the id to get.
* @return the object id; null if there is no index line
*/
public AbbreviatedObjectId getId(Side side) {
return side == Side.OLD ? getOldId() : getNewId();
}
@SuppressWarnings("nls")
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append("DiffEntry[");
buf.append(changeType);
buf.append(" ");
switch (changeType) {
case ADD:
buf.append(newPath);
break;
case COPY:
buf.append(oldPath + "->" + newPath);
break;
case DELETE:
buf.append(oldPath);
break;
case MODIFY:
buf.append(oldPath);
break;
case RENAME:
buf.append(oldPath + "->" + newPath);
break;
}
buf.append("]");
return buf.toString();
}
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 41144
Content-Disposition: inline; filename="DiffFormatter.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "cbac3f90b702362688917ddbff2d94f3fa026e59"
/*
* Copyright (C) 2009, Google Inc.
* Copyright (C) 2008-2020, Johannes E. Schindelin
* By default the setting of git config {@code core.quotePath} is active,
* but this can be overridden through this method.
*
* Release the internal ObjectReader state.
*
* @since 4.0
*/
@Override
public void close() {
if (reader != null && closeReader) {
reader.close();
}
}
/**
* Determine the differences between two trees.
*
* No output is created, instead only the file paths that are different are
* returned. Callers may choose to format these paths themselves, or convert
* them into {@link org.eclipse.jgit.patch.FileHeader} instances with a
* complete edit list by calling {@link #toFileHeader(DiffEntry)}.
*
* Either side may be null to indicate that the tree has beed added or
* removed. The diff will be computed against nothing.
*
* @param a
* the old (or previous) side or null
* @param b
* the new (or updated) side or null
* @return the paths that are different.
* @throws java.io.IOException
* trees cannot be read or file contents cannot be read.
*/
public List
* Either side may be null to indicate that the tree has beed added or
* removed. The diff will be computed against nothing.
*
* @param a
* the old (or previous) side or null
* @param b
* the new (or updated) side or null
* @return the paths that are different.
* @throws java.io.IOException
* trees cannot be read or file contents cannot be read.
*/
public List
* Either side may be null to indicate that the tree has beed added or
* removed. The diff will be computed against nothing.
*
* @param a
* the old (or previous) side or null
* @param b
* the new (or updated) side or null
* @throws java.io.IOException
* trees cannot be read, file contents cannot be read, or the
* patch cannot be output.
*/
public void format(AnyObjectId a, AnyObjectId b) throws IOException {
format(scan(a, b));
}
/**
* Format the differences between two trees.
*
* The patch is expressed as instructions to modify {@code a} to make it
* {@code b}.
*
*
* Either side may be null to indicate that the tree has beed added or
* removed. The diff will be computed against nothing.
*
* @param a
* the old (or previous) side or null
* @param b
* the new (or updated) side or null
* @throws java.io.IOException
* trees cannot be read, file contents cannot be read, or the
* patch cannot be output.
*/
public void format(RevTree a, RevTree b) throws IOException {
format(scan(a, b));
}
/**
* Format the differences between two trees.
*
* The patch is expressed as instructions to modify {@code a} to make it
* {@code b}.
*
* Either side may be null to indicate that the tree has beed added or
* removed. The diff will be computed against nothing.
*
* @param a
* the old (or previous) side or null
* @param b
* the new (or updated) side or null
* @throws java.io.IOException
* trees cannot be read, file contents cannot be read, or the
* patch cannot be output.
*/
public void format(AbstractTreeIterator a, AbstractTreeIterator b)
throws IOException {
format(scan(a, b));
}
/**
* Format a patch script from a list of difference entries. Requires
* {@link #scan(AbstractTreeIterator, AbstractTreeIterator)} to have been
* called first.
*
* @param entries
* entries describing the affected files.
* @throws java.io.IOException
* a file's content cannot be read, or the output stream cannot
* be written to.
*/
public void format(List extends DiffEntry> entries) throws IOException {
for (DiffEntry ent : entries)
format(ent);
}
/**
* Format a patch script for one file entry.
*
* @param ent
* the entry to be formatted.
* @throws java.io.IOException
* a file's content cannot be read, or the output stream cannot
* be written to.
*/
public void format(DiffEntry ent) throws IOException {
FormatResult res = createFormatResult(ent);
format(res.header, res.a, res.b, getDiffDriver(ent));
}
private static byte[] writeGitLinkText(AbbreviatedObjectId id) {
if (ObjectId.zeroId().equals(id.toObjectId())) {
return EMPTY;
}
return encodeASCII("Subproject commit " + id.name() //$NON-NLS-1$
+ "\n"); //$NON-NLS-1$
}
private String format(AbbreviatedObjectId id) {
if (id.isComplete() && reader != null) {
try {
id = reader.abbreviate(id.toObjectId(), abbreviationLength);
} catch (IOException cannotAbbreviate) {
// Ignore this. We'll report the full identity.
}
}
return id.name();
}
private String quotePath(String path) {
if (quotePaths == null || quotePaths.booleanValue()) {
return QuotedString.GIT_PATH.quote(path);
}
return QuotedString.GIT_PATH_MINIMAL.quote(path);
}
/**
* Format a patch script, reusing a previously parsed FileHeader.
*
* This formatter is primarily useful for editing an existing patch script
* to increase or reduce the number of lines of context within the script.
* All header lines are reused as-is from the supplied FileHeader.
*
* @param head
* existing file header containing the header lines to copy.
* @param a
* text source for the pre-image version of the content. This
* must match the content of
* {@link org.eclipse.jgit.patch.FileHeader#getOldId()}.
* @param b
* text source for the post-image version of the content. This
* must match the content of
* {@link org.eclipse.jgit.patch.FileHeader#getNewId()}.
* @param diffDriver
* the diff driver used to obtain function names in hunk headers
* @throws java.io.IOException
* writing to the supplied stream failed.
* @since 6.10.1
*/
public void format(FileHeader head, RawText a, RawText b,
DiffDriver diffDriver) throws IOException {
// Reuse the existing FileHeader as-is by blindly copying its
// header lines, but avoiding its hunks. Instead we recreate
// the hunks from the text instances we have been supplied.
//
final int start = head.getStartOffset();
int end = head.getEndOffset();
if (!head.getHunks().isEmpty())
end = head.getHunks().get(0).getStartOffset();
out.write(head.getBuffer(), start, end - start);
if (head.getPatchType() == PatchType.UNIFIED) {
format(head.toEditList(), a, b, diffDriver);
}
}
/**
* Format a patch script, reusing a previously parsed FileHeader.
*
* This formatter is primarily useful for editing an existing patch script
* to increase or reduce the number of lines of context within the script.
* All header lines are reused as-is from the supplied FileHeader.
*
* @param head
* existing file header containing the header lines to copy.
* @param a
* text source for the pre-image version of the content. This must match
* the content of {@link org.eclipse.jgit.patch.FileHeader#getOldId()}.
* @param b
* text source for the post-image version of the content. This must match
* the content of {@link org.eclipse.jgit.patch.FileHeader#getNewId()}.
* @throws java.io.IOException
* writing to the supplied stream failed.
*/
public void format(FileHeader head, RawText a, RawText b)
throws IOException {
format(head, a, b, null);
}
/**
* Formats a list of edits in unified diff format
*
* @param edits
* some differences which have been calculated between A and B
* @param a
* the text A which was compared
* @param b
* the text B which was compared
* @throws java.io.IOException
* if an IO error occurred
*/
public void format(EditList edits, RawText a, RawText b)
throws IOException {
format(edits, a, b, null);
}
/**
* Formats a list of edits in unified diff format
*
* @param edits
* some differences which have been calculated between A and B
* @param a
* the text A which was compared
* @param b
* the text B which was compared
* @param diffDriver
* the diff driver used to obtain function names in hunk headers
* @throws java.io.IOException
* if an IO error occurred
* @since 6.10.1
*/
public void format(EditList edits, RawText a, RawText b,
DiffDriver diffDriver) throws IOException {
for (int curIdx = 0; curIdx < edits.size();) {
Edit curEdit = edits.get(curIdx);
final int endIdx = findCombinedEnd(edits, curIdx);
final Edit endEdit = edits.get(endIdx);
int aCur = (int) Math.max(0, (long) curEdit.getBeginA() - context);
int bCur = (int) Math.max(0, (long) curEdit.getBeginB() - context);
final int aEnd = (int) Math.min(a.size(), (long) endEdit.getEndA() + context);
final int bEnd = (int) Math.min(b.size(), (long) endEdit.getEndB() + context);
writeHunkHeader(aCur, aEnd, bCur, bEnd,
getFuncName(a, aCur - 1, diffDriver));
while (aCur < aEnd || bCur < bEnd) {
if (aCur < curEdit.getBeginA() || endIdx + 1 < curIdx) {
writeContextLine(a, aCur);
if (isEndOfLineMissing(a, aCur))
out.write(noNewLine);
aCur++;
bCur++;
} else if (aCur < curEdit.getEndA()) {
writeRemovedLine(a, aCur);
if (isEndOfLineMissing(a, aCur))
out.write(noNewLine);
aCur++;
} else if (bCur < curEdit.getEndB()) {
writeAddedLine(b, bCur);
if (isEndOfLineMissing(b, bCur))
out.write(noNewLine);
bCur++;
}
if (end(curEdit, aCur, bCur) && ++curIdx < edits.size())
curEdit = edits.get(curIdx);
}
}
}
/**
* Output a line of context (unmodified line).
*
* @param text
* RawText for accessing raw data
* @param line
* the line number within text
* @throws java.io.IOException
* if an IO error occurred
*/
protected void writeContextLine(RawText text, int line)
throws IOException {
writeLine(' ', text, line);
}
private static boolean isEndOfLineMissing(RawText text, int line) {
return line + 1 == text.size() && text.isMissingNewlineAtEnd();
}
/**
* Output an added line.
*
* @param text
* RawText for accessing raw data
* @param line
* the line number within text
* @throws java.io.IOException
* if an IO error occurred
*/
protected void writeAddedLine(RawText text, int line)
throws IOException {
writeLine('+', text, line);
}
/**
* Output a removed line
*
* @param text
* RawText for accessing raw data
* @param line
* the line number within text
* @throws java.io.IOException
* if an IO error occurred
*/
protected void writeRemovedLine(RawText text, int line)
throws IOException {
writeLine('-', text, line);
}
/**
* Output a hunk header
*
* @param aStartLine
* within first source
* @param aEndLine
* within first source
* @param bStartLine
* within second source
* @param bEndLine
* within second source
* @throws java.io.IOException
* if an IO error occurred
*/
protected void writeHunkHeader(int aStartLine, int aEndLine, int bStartLine,
int bEndLine) throws IOException {
writeHunkHeader(aStartLine, aEndLine, bStartLine, bEndLine, null);
}
/**
* Output a hunk header
*
* @param aStartLine
* within first source
* @param aEndLine
* within first source
* @param bStartLine
* within second source
* @param bEndLine
* within second source
* @param funcName
* function name of this hunk
* @throws java.io.IOException
* if an IO error occurred
* @since 6.10.1
*/
protected void writeHunkHeader(int aStartLine, int aEndLine, int bStartLine,
int bEndLine, String funcName) throws IOException {
out.write('@');
out.write('@');
writeRange('-', aStartLine + 1, aEndLine - aStartLine);
writeRange('+', bStartLine + 1, bEndLine - bStartLine);
out.write(' ');
out.write('@');
out.write('@');
if (funcName != null) {
out.write(' ');
out.write(funcName.getBytes());
}
out.write('\n');
}
private void writeRange(char prefix, int begin, int cnt)
throws IOException {
out.write(' ');
out.write(prefix);
switch (cnt) {
case 0:
// If the range is empty, its beginning number must be the
// line just before the range, or 0 if the range is at the
// start of the file stream. Here, begin is always 1 based,
// so an empty file would produce "0,0".
//
out.write(encodeASCII(begin - 1));
out.write(',');
out.write('0');
break;
case 1:
// If the range is exactly one line, produce only the number.
//
out.write(encodeASCII(begin));
break;
default:
out.write(encodeASCII(begin));
out.write(',');
out.write(encodeASCII(cnt));
break;
}
}
/**
* Write a standard patch script line.
*
* @param prefix
* prefix before the line, typically '-', '+', ' '.
* @param text
* the text object to obtain the line from.
* @param cur
* line number to output.
* @throws java.io.IOException
* the stream threw an exception while writing to it.
*/
protected void writeLine(final char prefix, final RawText text,
final int cur) throws IOException {
out.write(prefix);
text.writeLine(out, cur);
out.write('\n');
}
/**
* Creates a {@link org.eclipse.jgit.patch.FileHeader} representing the
* given {@link org.eclipse.jgit.diff.DiffEntry}
*
* This method does not use the OutputStream associated with this
* DiffFormatter instance. It is therefore safe to instantiate this
* DiffFormatter instance with a
* {@link org.eclipse.jgit.util.io.DisabledOutputStream} if this method is
* the only one that will be used.
*
* @param ent
* the DiffEntry to create the FileHeader for
* @return a FileHeader representing the DiffEntry. The FileHeader's buffer
* will contain only the header of the diff output. It will also
* contain one {@link org.eclipse.jgit.patch.HunkHeader}.
* @throws java.io.IOException
* the stream threw an exception while writing to it, or one of
* the blobs referenced by the DiffEntry could not be read.
* @throws org.eclipse.jgit.errors.CorruptObjectException
* one of the blobs referenced by the DiffEntry is corrupt.
* @throws org.eclipse.jgit.errors.MissingObjectException
* one of the blobs referenced by the DiffEntry is missing.
*/
public FileHeader toFileHeader(DiffEntry ent) throws IOException,
CorruptObjectException, MissingObjectException {
return createFormatResult(ent).header;
}
private static class FormatResult {
FileHeader header;
RawText a;
RawText b;
}
private FormatResult createFormatResult(DiffEntry ent) throws IOException,
CorruptObjectException, MissingObjectException {
final FormatResult res = new FormatResult();
ByteArrayOutputStream buf = new ByteArrayOutputStream();
final EditList editList;
final FileHeader.PatchType type;
formatHeader(buf, ent);
if (ent.getOldId() == null || ent.getNewId() == null) {
// Content not changed (e.g. only mode, pure rename)
editList = new EditList();
type = PatchType.UNIFIED;
res.header = new FileHeader(buf.toByteArray(), editList, type);
return res;
}
assertHaveReader();
RawText aRaw = null;
RawText bRaw = null;
if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) {
aRaw = new RawText(writeGitLinkText(ent.getOldId()));
bRaw = new RawText(writeGitLinkText(ent.getNewId()));
} else {
try {
aRaw = open(OLD, ent);
bRaw = open(NEW, ent);
} catch (BinaryBlobException e) {
// Do nothing; we check for null below.
formatOldNewPaths(buf, ent);
buf.write(encodeASCII("Binary files differ\n")); //$NON-NLS-1$
editList = new EditList();
type = PatchType.BINARY;
res.header = new FileHeader(buf.toByteArray(), editList, type);
return res;
}
}
res.a = aRaw;
res.b = bRaw;
editList = diff(res.a, res.b);
type = PatchType.UNIFIED;
switch (ent.getChangeType()) {
case RENAME:
case COPY:
if (!editList.isEmpty())
formatOldNewPaths(buf, ent);
break;
default:
formatOldNewPaths(buf, ent);
break;
}
res.header = new FileHeader(buf.toByteArray(), editList, type);
return res;
}
private EditList diff(RawText a, RawText b) {
return diffAlgorithm.diff(comparator, a, b);
}
private void assertHaveReader() {
if (reader == null) {
throw new IllegalStateException(JGitText.get().readerIsRequired);
}
}
private RawText open(DiffEntry.Side side, DiffEntry entry)
throws IOException, BinaryBlobException {
if (entry.getMode(side) == FileMode.MISSING)
return RawText.EMPTY_TEXT;
if (entry.getMode(side).getObjectType() != Constants.OBJ_BLOB)
return RawText.EMPTY_TEXT;
AbbreviatedObjectId id = entry.getId(side);
if (!id.isComplete()) {
Collection
* An edit covers the modified region only. It does not cover a common region.
*
* Regions should be specified using 0 based notation, so add 1 to the start and
* end marks for line numbers in a file.
*
* An edit where {@code beginA == endA && beginB < endB} is an insert edit, that
* is sequence B inserted the elements in region
* An edit where {@code beginA < endA && beginB == endB} is a delete edit, that
* is sequence B has removed the elements between
* An edit where {@code beginA < endA && beginB < endB} is a replace edit, that
* is sequence B has replaced the range of elements between
*
* This sequence acts as a proxy for the real sequence, caching element hash
* codes so they don't need to be recomputed each time. Sequences of this type
* must be used with a {@link org.eclipse.jgit.diff.HashedSequenceComparator}.
*
* To construct an instance of this type use
* {@link org.eclipse.jgit.diff.HashedSequencePair}.
*
* @param
* This comparator acts as a proxy for the real comparator, evaluating the
* cached hash code before testing the underlying comparator's equality.
* Comparators of this type must be used with a
* {@link org.eclipse.jgit.diff.HashedSequence}.
*
* To construct an instance of this type use
* {@link org.eclipse.jgit.diff.HashedSequencePair}.
*
* @param
* This pair wraps two sequences that contain cached hash codes for the input
* sequences.
*
* @param
* This implementation was derived by using the 4 rules that are outlined in
* Bram Cohen's blog,
* and then was further extended to support low-occurrence common elements.
*
* The basic idea of the algorithm is to create a histogram of occurrences for
* each element of sequence A. Each element of sequence B is then considered in
* turn. If the element also exists in sequence A, and has a lower occurrence
* count, the positions are considered as a candidate for the longest common
* subsequence (LCS). After scanning of B is complete the LCS that has the
* lowest number of occurrences is chosen as a split point. The region is split
* around the LCS, and the algorithm is recursively applied to the sections
* before and after the LCS.
*
* By always selecting a LCS position with the lowest occurrence count, this
* algorithm behaves exactly like Bram Cohen's patience diff whenever there is a
* unique common element available between the two sequences. When no unique
* elements exist, the lowest occurrence element is chosen instead. This offers
* more readable diffs than simply falling back on the standard Myers' O(ND)
* algorithm would produce.
*
* To prevent the algorithm from having an O(N^2) running time, an upper limit
* on the number of unique elements in a histogram bucket is configured by
* {@link #setMaxChainLength(int)}. If sequence A has more than this many
* elements that hash into the same hash bucket, the algorithm passes the region
* to {@link #setFallbackAlgorithm(DiffAlgorithm)}. If no fallback algorithm is
* configured, the region is emitted as a replace edit.
*
* During scanning of sequence B, any element of A that occurs more than
* {@link #setMaxChainLength(int)} times is never considered for an LCS match
* position, even if it is common between the two sequences. This limits the
* number of locations in sequence A that must be considered to find the LCS,
* and helps maintain a lower running time bound.
*
* So long as {@link #setMaxChainLength(int)} is a small constant (such as 64),
* the algorithm runs in O(N * D) time, where N is the sum of the input lengths
* and D is the number of edits in the resulting EditList. If the supplied
* {@link org.eclipse.jgit.diff.SequenceComparator} has a good hash function,
* this implementation typically out-performs
* {@link org.eclipse.jgit.diff.MyersDiff}, even though its theoretical running
* time is the same.
*
* This implementation has an internal limitation that prevents it from handling
* sequences with more than 268,435,456 (2^28) elements.
*/
public class HistogramDiff extends LowLevelDiffAlgorithm {
/** Algorithm to use when there are too many element occurrences. */
DiffAlgorithm fallback = MyersDiff.INSTANCE;
/**
* Maximum number of positions to consider for a given element hash.
*
* All elements with the same hash are stored into a single chain. The chain
* size is capped to ensure search is linear time at O(len_A + len_B) rather
* than quadratic at O(len_A * len_B).
*/
int maxChainLength = 64;
/**
* Set the algorithm used when there are too many element occurrences.
*
* @param alg
* the secondary algorithm. If null the region will be denoted as
* a single REPLACE block.
*/
public void setFallbackAlgorithm(DiffAlgorithm alg) {
fallback = alg;
}
/**
* Maximum number of positions to consider for a given element hash.
*
* All elements with the same hash are stored into a single chain. The chain
* size is capped to ensure search is linear time at O(len_A + len_B) rather
* than quadratic at O(len_A * len_B).
*
* @param maxLen
* new maximum length.
*/
public void setMaxChainLength(int maxLen) {
maxChainLength = maxLen;
}
@Override
public
* Each element in the range being considered is put into a hash table, tracking
* the number of times that distinct element appears in the sequence. Once all
* elements have been inserted from sequence A, each element of sequence B is
* probed in the hash table and the longest common subsequence with the lowest
* occurrence count in A is used as the result.
*
* @param
* The basic idea is to put the line numbers of text A as columns ("x") and the
* lines of text B as rows ("y"). Now you try to find the shortest "edit path"
* from the upper left corner to the lower right corner, where you can always go
* horizontally or vertically, but diagonally from (x,y) to (x+1,y+1) only if
* line x in text A is identical to line y in text B.
*
* Myers' fundamental concept is the "furthest reaching D-path on diagonal k": a
* D-path is an edit path starting at the upper left corner and containing
* exactly D non-diagonal elements ("differences"). The furthest reaching D-path
* on diagonal k is the one that contains the most (diagonal) elements which
* ends on diagonal k (where k = y - x).
*
* Example:
*
*
* Since every D-path has exactly D horizontal or vertical elements, it can only
* end on the diagonals -D, -D+2, ..., D-2, D.
*
* Since every furthest reaching D-path contains at least one furthest reaching
* (D-1)-path (except for D=0), we can construct them recursively.
*
* Since we are really interested in the shortest edit path, we can start
* looking for a 0-path, then a 1-path, and so on, until we find a path that
* ends in the lower right corner.
*
* To save space, we do not need to store all paths (which has quadratic space
* requirements), but generate the D-paths simultaneously from both sides. When
* the ends meet, we will have found "the middle" of the path. From the end
* points of that diagonal part, we can generate the rest recursively.
*
* This only requires linear space.
*
* The overall (runtime) complexity is:
*
*
* (With each step, we have to find the middle parts of twice as many regions as
* before, but the regions (as well as the D) are halved.)
*
* So the overall runtime complexity stays the same with linear space, albeit
* with a larger constant factor.
*
* @param
* Elements of the sequence are the lines of the file, as delimited by the UNIX
* newline character ('\n'). The file content is treated as 8 bit binary text,
* with no assumptions or requirements on character encoding.
*
* Note that the first line of the file is element 0, as defined by the Sequence
* interface API. Traditionally in a text editor a patch file the first line is
* line number 1. Callers may need to subtract 1 prior to invoking methods if
* they are converting from "line number" to "element index".
*/
public class RawText extends Sequence {
/** A RawText of length 0 */
public static final RawText EMPTY_TEXT = new RawText(new byte[0]);
/**
* Default and minimum for {@link #BUFFER_SIZE}.
*/
private static final int FIRST_FEW_BYTES = 8 * 1024;
/**
* Number of bytes to check for heuristics in {@link #isBinary(byte[])}.
*/
private static final AtomicInteger BUFFER_SIZE = new AtomicInteger(
FIRST_FEW_BYTES);
/** The file content for this sequence. */
protected final byte[] content;
/** Map of line number to starting position within {@link #content}. */
protected final IntList lines;
/**
* Create a new sequence from an existing content byte array.
*
* The entire array (indexes 0 through length-1) is used as the content.
*
* @param input
* the content array. The object retains a reference to this
* array, so it should be immutable.
*/
public RawText(byte[] input) {
this(input, RawParseUtils.lineMap(input, 0, input.length));
}
/**
* Create a new sequence from the existing content byte array and the line
* map indicating line boundaries.
*
* @param input
* the content array. The object retains a reference to this
* array, so it should be immutable.
* @param lineMap
* an array with 1-based offsets for the start of each line.
* The first and last entries should be {@link Integer#MIN_VALUE}
* and an offset one past the end of the last line, respectively.
* @since 5.0
*/
public RawText(byte[] input, IntList lineMap) {
content = input;
lines = lineMap;
}
/**
* Create a new sequence from a file.
*
* The entire file contents are used.
*
* @param file
* the text file.
* @throws java.io.IOException
* if Exceptions occur while reading the file
*/
public RawText(File file) throws IOException {
this(IO.readFully(file));
}
/**
* Get the raw content
*
* @return the raw, unprocessed content read.
* @since 4.11
*/
public byte[] getRawContent() {
return content;
}
/** @return total number of items in the sequence. */
@Override
public int size() {
// The line map is always 2 entries larger than the number of lines in
// the file. Index 0 is padded out/unused. The last index is the total
// length of the buffer, and acts as a sentinel.
//
return lines.size() - 2;
}
/**
* Write a specific line to the output stream, without its trailing LF.
*
* The specified line is copied as-is, with no character encoding
* translation performed.
*
* If the specified line ends with an LF ('\n'), the LF is not
* copied. It is up to the caller to write the LF, if desired, between
* output lines.
*
* @param out
* stream to copy the line data onto.
* @param i
* index of the line to extract. Note this is 0-based, so line
* number 1 is actually index 0.
* @throws java.io.IOException
* the stream write operation failed.
*/
public void writeLine(OutputStream out, int i)
throws IOException {
int start = getStart(i);
int end = getEnd(i);
if (content[end - 1] == '\n')
end--;
out.write(content, start, end - start);
}
/**
* Determine if the file ends with a LF ('\n').
*
* @return true if the last line has an LF; false otherwise.
*/
public boolean isMissingNewlineAtEnd() {
final int end = lines.get(lines.size() - 1);
if (end == 0)
return true;
return content[end - 1] != '\n';
}
/**
* Get the text for a single line.
*
* @param i
* index of the line to extract. Note this is 0-based, so line
* number 1 is actually index 0.
* @return the text for the line, without a trailing LF.
*/
public String getString(int i) {
return getString(i, i + 1, true);
}
/**
* Get the raw text for a single line.
*
* @param i
* index of the line to extract. Note this is 0-based, so line
* number 1 is actually index 0.
* @return the text for the line, without a trailing LF, as a
* {@link ByteBuffer} that is backed by a slice of the
* {@link #getRawContent() raw content}, with the buffer's position
* on the start of the line and the limit at the end.
* @since 5.12
*/
public ByteBuffer getRawString(int i) {
int s = getStart(i);
int e = getEnd(i);
if (e > 0 && content[e - 1] == '\n') {
e--;
}
return ByteBuffer.wrap(content, s, e - s);
}
/**
* Get the text for a region of lines.
*
* @param begin
* index of the first line to extract. Note this is 0-based, so
* line number 1 is actually index 0.
* @param end
* index of one past the last line to extract.
* @param dropLF
* if true the trailing LF ('\n') of the last returned line is
* dropped, if present.
* @return the text for lines {@code [begin, end)}.
*/
public String getString(int begin, int end, boolean dropLF) {
if (begin == end)
return ""; //$NON-NLS-1$
int s = getStart(begin);
int e = getEnd(end - 1);
if (dropLF && content[e - 1] == '\n')
e--;
return decode(s, e);
}
/**
* Decode a region of the text into a String.
*
* The default implementation of this method tries to guess the character
* set by considering UTF-8, the platform default, and falling back on
* ISO-8859-1 if neither of those can correctly decode the region given.
*
* @param start
* first byte of the content to decode.
* @param end
* one past the last byte of the content to decode.
* @return the region {@code [start, end)} decoded as a String.
*/
protected String decode(int start, int end) {
return RawParseUtils.decode(content, start, end);
}
private int getStart(int i) {
return lines.get(i + 1);
}
private int getEnd(int i) {
return lines.get(i + 2);
}
/**
* Obtains the buffer size to use for analyzing whether certain content is
* text or binary, or what line endings are used if it's text.
*
* @return the buffer size, by default {@link #FIRST_FEW_BYTES} bytes
* @since 6.0
*/
public static int getBufferSize() {
return BUFFER_SIZE.get();
}
/**
* Sets the buffer size to use for analyzing whether certain content is text
* or binary, or what line endings are used if it's text. If the given
* {@code bufferSize} is smaller than {@link #FIRST_FEW_BYTES} set the
* buffer size to {@link #FIRST_FEW_BYTES}.
*
* @param bufferSize
* Size to set
* @return the size actually set
* @since 6.0
*/
public static int setBufferSize(int bufferSize) {
int newSize = Math.max(FIRST_FEW_BYTES, bufferSize);
return BUFFER_SIZE.updateAndGet(curr -> newSize);
}
/**
* Determine heuristically whether the bytes contained in a stream
* represents binary (as opposed to text) content.
*
* Note: Do not further use this stream after having called this method! The
* stream may not be fully read and will be left at an unknown position
* after consuming an unknown number of bytes. The caller is responsible for
* closing the stream.
*
* @param raw
* input stream containing the raw file content.
* @return true if raw is likely to be a binary file, false otherwise
* @throws java.io.IOException
* if input stream could not be read
*/
public static boolean isBinary(InputStream raw) throws IOException {
final byte[] buffer = new byte[getBufferSize() + 1];
int cnt = 0;
while (cnt < buffer.length) {
final int n = raw.read(buffer, cnt, buffer.length - cnt);
if (n == -1) {
break;
}
cnt += n;
}
return isBinary(buffer, cnt, cnt < buffer.length);
}
/**
* Determine heuristically whether a byte array represents binary (as
* opposed to text) content.
*
* @param raw
* the raw file content.
* @return true if raw is likely to be a binary file, false otherwise
*/
public static boolean isBinary(byte[] raw) {
return isBinary(raw, raw.length);
}
/**
* Determine heuristically whether a byte array represents binary (as
* opposed to text) content.
*
* @param raw
* the raw file content.
* @param length
* number of bytes in {@code raw} to evaluate. This should be
* {@code raw.length} unless {@code raw} was over-allocated by
* the caller.
* @return true if raw is likely to be a binary file, false otherwise
*/
public static boolean isBinary(byte[] raw, int length) {
return isBinary(raw, length, false);
}
/**
* Determine heuristically whether a byte array represents binary (as
* opposed to text) content.
*
* @param raw
* the raw file content.
* @param length
* number of bytes in {@code raw} to evaluate. This should be
* {@code raw.length} unless {@code raw} was over-allocated by
* the caller.
* @param complete
* whether {@code raw} contains the whole data
* @return true if raw is likely to be a binary file, false otherwise
* @since 6.0
*/
public static boolean isBinary(byte[] raw, int length, boolean complete) {
// Similar heuristic as C Git. Differences:
// - limited buffer size; may be only the beginning of a large blob
// - no counting of printable vs. non-printable bytes < 0x20 and 0x7F
int maxLength = getBufferSize();
boolean isComplete = complete;
if (length > maxLength) {
// We restrict the length in all cases to getBufferSize() to get
// predictable behavior. Sometimes we load streams, and sometimes we
// have the full data in memory. With streams, we never look at more
// than the first getBufferSize() bytes. If we looked at more when
// we have the full data, different code paths in JGit might come to
// different conclusions.
length = maxLength;
isComplete = false;
}
int ptr = -1;
byte current;
while (ptr < length - 2) {
current = raw[++ptr];
if (current == '\0' || (current == '\r' && raw[++ptr] != '\n')) {
return true;
}
}
if (ptr == length - 2) {
// if '\r' be last, then if isComplete then return binary
current = raw[++ptr];
return current == '\0' || (current == '\r' && isComplete);
}
return false;
}
/**
* Determines from the last two bytes read from a source if it looks like
* binary content.
*
* @param curr
* the last byte, read after {@code prev}
* @param prev
* the previous byte, read before {@code last}
* @return {@code true}Â if either byte is NUL, or if prev is CR and curr is
* not LF, {@code false} otherwise
* @since 6.0
*/
public static boolean isBinary(byte curr, byte prev) {
return curr == '\0' || (curr != '\n' && prev == '\r') || prev == '\0';
}
/**
* Determine heuristically whether a byte array represents text content
* using CR-LF as line separator.
*
* @param raw
* the raw file content.
* @return {@code true} if raw is likely to be CR-LF delimited text,
* {@code false} otherwise
* @since 5.3
*/
public static boolean isCrLfText(byte[] raw) {
return isCrLfText(raw, raw.length);
}
/**
* Determine heuristically whether the bytes contained in a stream represent
* text content using CR-LF as line separator.
*
* Note: Do not further use this stream after having called this method! The
* stream may not be fully read and will be left at an unknown position
* after consuming an unknown number of bytes. The caller is responsible for
* closing the stream.
*
* @param raw
* input stream containing the raw file content.
* @return {@code true} if raw is likely to be CR-LF delimited text,
* {@code false} otherwise
* @throws java.io.IOException
* if input stream could not be read
* @since 5.3
*/
public static boolean isCrLfText(InputStream raw) throws IOException {
byte[] buffer = new byte[getBufferSize()];
int cnt = 0;
while (cnt < buffer.length) {
int n = raw.read(buffer, cnt, buffer.length - cnt);
if (n == -1) {
break;
}
cnt += n;
}
return isCrLfText(buffer, cnt);
}
/**
* Determine heuristically whether a byte array represents text content
* using CR-LF as line separator.
*
* @param raw
* the raw file content.
* @param length
* number of bytes in {@code raw} to evaluate.
* @return {@code true} if raw is likely to be CR-LF delimited text,
* {@code false} otherwise
* @since 5.3
*/
public static boolean isCrLfText(byte[] raw, int length) {
return isCrLfText(raw, length, false);
}
/**
* Determine heuristically whether a byte array represents text content
* using CR-LF as line separator.
*
* @param raw
* the raw file content.
* @param length
* number of bytes in {@code raw} to evaluate.
* @return {@code true} if raw is likely to be CR-LF delimited text,
* {@code false} otherwise
* @param complete
* whether {@code raw} contains the whole data
* @since 6.0
*/
public static boolean isCrLfText(byte[] raw, int length, boolean complete) {
boolean has_crlf = false;
int ptr = -1;
byte current;
while (ptr < length - 2) {
current = raw[++ptr];
if (current == '\0') {
return false;
}
if (current == '\r') {
if (raw[++ptr] != '\n') {
return false;
}
has_crlf = true;
}
}
if (ptr == length - 2) {
// if '\r' be last, then if isComplete then return binary
current = raw[++ptr];
if (current == '\0' || (current == '\r' && complete)) {
return false;
}
}
return has_crlf;
}
/**
* Get the line delimiter for the first line.
*
* @since 2.0
* @return the line delimiter or
* When comparing two files together their score must be greater than or
* equal to the rename score for them to be considered a rename match. The
* score is computed based on content similarity, so a score of 60 implies
* that approximately 60% of the bytes in the files are identical.
*
* @param score
* new rename score, must be within [0, 100].
* @throws java.lang.IllegalArgumentException
* the score was not within [0, 100].
*/
public void setRenameScore(int score) {
if (score < 0 || score > 100)
throw new IllegalArgumentException(
JGitText.get().similarityScoreMustBeWithinBounds);
renameScore = score;
}
/**
* Get break score
*
* @return the similarity score required to keep modified file pairs
* together. Any modify pairs that score below this will be broken
* apart into separate add/deletes. Values less than or equal to
* zero indicate that no modifies will be broken apart. Values over
* 100 cause all modify pairs to be broken.
*/
public int getBreakScore() {
return breakScore;
}
/**
* Set break score
*
* @param breakScore
* the similarity score required to keep modified file pairs
* together. Any modify pairs that score below this will be
* broken apart into separate add/deletes. Values less than or
* equal to zero indicate that no modifies will be broken apart.
* Values over 100 cause all modify pairs to be broken.
*/
public void setBreakScore(int breakScore) {
this.breakScore = breakScore;
}
/**
* Get rename limit
*
* @return limit on number of paths to perform inexact rename detection
*/
public int getRenameLimit() {
return renameLimit;
}
/**
* Set the limit on the number of files to perform inexact rename detection.
*
* The rename detector has to build a square matrix of the rename limit on
* each side, then perform that many file compares to determine similarity.
* If 1000 files are added, and 1000 files are deleted, a 1000*1000 matrix
* must be allocated, and 1,000,000 file compares may need to be performed.
*
* @param limit
* new file limit. 0 means no limit; a negative number means no
* inexact rename detection will be performed, only exact rename
* detection.
*/
public void setRenameLimit(int limit) {
renameLimit = limit;
}
/**
* Get file size threshold for detecting renames. Files larger
* than this size will not be processed for rename detection.
*
* @return threshold in bytes of the file size.
* @since 5.12
*/
public int getBigFileThreshold() { return bigFileThreshold; }
/**
* Set the file size threshold for detecting renames. Files larger than this
* threshold will be skipped during rename detection computation.
*
* @param threshold file size threshold in bytes.
* @since 5.12
*/
public void setBigFileThreshold(int threshold) {
this.bigFileThreshold = threshold;
}
/**
* Get skipping detecting content renames for binary files.
*
* @return true if content renames should be skipped for binary files, false otherwise.
* @since 5.12
*/
public boolean getSkipContentRenamesForBinaryFiles() {
return skipContentRenamesForBinaryFiles;
}
/**
* Sets skipping detecting content renames for binary files.
*
* @param value true if content renames should be skipped for binary files, false otherwise.
* @since 5.12
*/
public void setSkipContentRenamesForBinaryFiles(boolean value) {
this.skipContentRenamesForBinaryFiles = value;
}
/**
* Check if the detector is over the rename limit.
*
* This method can be invoked either before or after {@code getEntries} has
* been used to perform rename detection.
*
* @return true if the detector has more file additions or removals than the
* rename limit is currently set to. In such configurations the
* detector will skip expensive computation.
*/
public boolean isOverRenameLimit() {
if (done)
return overRenameLimit;
int cnt = Math.max(added.size(), deleted.size());
return getRenameLimit() != 0 && getRenameLimit() < cnt;
}
/**
* Add entries to be considered for rename detection.
*
* @param entriesToAdd
* one or more entries to add.
* @throws java.lang.IllegalStateException
* if {@code getEntries} was already invoked.
*/
public void addAll(Collection
* This convenience function runs without a progress monitor.
*
* A sequence of elements is defined to contain elements in the index range
*
* Implementations of Sequence are primarily intended for use in content
* difference detection algorithms, to produce an
* {@link org.eclipse.jgit.diff.EditList} of {@link org.eclipse.jgit.diff.Edit}
* instances describing how two Sequence instances differ.
*
* To be compared against another Sequence of the same type, a supporting
* {@link org.eclipse.jgit.diff.SequenceComparator} must also be supplied.
*/
public abstract class Sequence {
/** @return total number of items in the sequence. */
/**
* Get size
*
* @return size
*/
public abstract int size();
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 3061
Content-Disposition: inline; filename="SequenceComparator.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "d068cf5b4d4ab71bfd05e94406e6bd20740d01af"
/*
* Copyright (C) 2010, Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.diff;
/**
* Equivalence function for a {@link org.eclipse.jgit.diff.Sequence} compared by
* difference algorithm.
*
* Difference algorithms can use a comparator to compare portions of two
* sequences and discover the minimal edits required to transform from one
* sequence to the other sequence.
*
* Indexes within a sequence are zero-based.
*
* @param
* This structure can be used to compute an approximation of the similarity
* between two files. The index is used by
* {@link org.eclipse.jgit.diff.SimilarityRenameDetector} to compute scores
* between files.
*
* To save space in memory, this index uses a space efficient encoding which
* will not exceed 1 MiB per instance. The index starts out at a smaller size
* (closer to 2 KiB), but may grow as more distinct blocks within the scanned
* file are discovered.
*
* @since 4.0
*/
public class SimilarityIndex {
/** A special {@link TableFullException} used in place of OutOfMemoryError. */
public static final TableFullException
TABLE_FULL_OUT_OF_MEMORY = new TableFullException();
/**
* Shift to apply before storing a key.
*
* Within the 64 bit table record space, we leave the highest bit unset so
* all values are positive. The lower 32 bits to count bytes.
*/
private static final int KEY_SHIFT = 32;
/** Maximum value of the count field, also mask to extract the count. */
private static final long MAX_COUNT = (1L << KEY_SHIFT) - 1;
/**
* Total amount of bytes hashed into the structure, including \n. This is
* usually the size of the file minus number of CRLF encounters.
*/
private long hashedCnt;
/** Number of non-zero entries in {@link #idHash}. */
private int idSize;
/** {@link #idSize} that triggers {@link #idHash} to double in size. */
private int idGrowAt;
/**
* Pairings of content keys and counters.
*
* Slots in the table are actually two ints wedged into a single long. The
* upper 32 bits stores the content key, and the remaining lower bits stores
* the number of bytes associated with that key. Empty slots are denoted by
* 0, which cannot occur because the count cannot be 0. Values can only be
* positive, which we enforce during key addition.
*/
private long[] idHash;
/** {@code idHash.length == 1 << idHashBits}. */
private int idHashBits;
/**
* Create a new similarity index for the given object
*
* @param obj
* the object to hash
* @return similarity index for this object
* @throws java.io.IOException
* file contents cannot be read from the repository.
* @throws org.eclipse.jgit.diff.SimilarityIndex.TableFullException
* object hashing overflowed the storage capacity of the
* SimilarityIndex.
*/
public static SimilarityIndex create(ObjectLoader obj) throws IOException,
TableFullException {
SimilarityIndex idx = new SimilarityIndex();
idx.hash(obj);
idx.sort();
return idx;
}
SimilarityIndex() {
idHashBits = 8;
idHash = new long[1 << idHashBits];
idGrowAt = growAt(idHashBits);
}
static boolean isBinary(ObjectLoader obj) throws IOException {
if (obj.isLarge()) {
try (ObjectStream in1 = obj.openStream()) {
return RawText.isBinary(in1);
}
}
byte[] raw = obj.getCachedBytes();
return RawText.isBinary(raw, raw.length, true);
}
void hash(ObjectLoader obj) throws MissingObjectException, IOException,
TableFullException {
if (obj.isLarge()) {
hashLargeObject(obj);
} else {
byte[] raw = obj.getCachedBytes();
hash(raw, 0, raw.length);
}
}
private void hashLargeObject(ObjectLoader obj) throws IOException,
TableFullException {
boolean text;
text = !isBinary(obj);
try (ObjectStream in2 = obj.openStream()) {
hash(in2, in2.getSize(), text);
}
}
void hash(byte[] raw, int ptr, int end) throws TableFullException {
final boolean text = !RawText.isBinary(raw, raw.length, true);
hashedCnt = 0;
while (ptr < end) {
int hash = 5381;
int blockHashedCnt = 0;
int start = ptr;
// Hash one line, or one block, whichever occurs first.
do {
int c = raw[ptr++] & 0xff;
// Ignore CR in CRLF sequence if text
if (text && c == '\r' && ptr < end && raw[ptr] == '\n')
continue;
blockHashedCnt++;
if (c == '\n')
break;
hash = (hash << 5) + hash + c;
} while (ptr < end && ptr - start < 64);
hashedCnt += blockHashedCnt;
add(hash, blockHashedCnt);
}
}
void hash(InputStream in, long remaining, boolean text) throws IOException,
TableFullException {
byte[] buf = new byte[4096];
int ptr = 0;
int cnt = 0;
while (0 < remaining) {
int hash = 5381;
int blockHashedCnt = 0;
// Hash one line, or one block, whichever occurs first.
int n = 0;
do {
if (ptr == cnt) {
ptr = 0;
cnt = in.read(buf, 0, buf.length);
if (cnt <= 0)
throw new EOFException();
}
n++;
int c = buf[ptr++] & 0xff;
// Ignore CR in CRLF sequence if text
if (text && c == '\r' && ptr < cnt && buf[ptr] == '\n')
continue;
blockHashedCnt++;
if (c == '\n')
break;
hash = (hash << 5) + hash + c;
} while (n < 64 && n < remaining);
hashedCnt += blockHashedCnt;
add(hash, blockHashedCnt);
remaining -= n;
}
}
/**
* Sort the internal table so it can be used for efficient scoring.
*
* Once sorted, additional lines/blocks cannot be added to the index.
*/
void sort() {
// Sort the array. All of the empty space will wind up at the front,
// because we forced all of the keys to always be positive. Later
// we only work with the back half of the array.
//
Arrays.sort(idHash);
}
/**
* Compute the similarity score between this index and another.
*
* A region of a file is defined as a line in a text file or a fixed-size
* block in a binary file. To prepare an index, each region in the file is
* hashed; the values and counts of hashes are retained in a sorted table.
* Define the similarity fraction F as the count of matching regions
* between the two files divided between the maximum count of regions in
* either file. The similarity score is F multiplied by the maxScore
* constant, yielding a range [0, maxScore]. It is defined as maxScore for
* the degenerate case of two empty files.
*
* The similarity score is symmetrical; i.e. a.score(b) == b.score(a).
*
* @param dst
* the other index
* @param maxScore
* the score representing a 100% match
* @return the similarity score
*/
public int score(SimilarityIndex dst, int maxScore) {
long max = Math.max(hashedCnt, dst.hashedCnt);
if (max == 0)
return maxScore;
return (int) ((common(dst) * maxScore) / max);
}
long common(SimilarityIndex dst) {
return common(this, dst);
}
private static long common(SimilarityIndex src, SimilarityIndex dst) {
int srcIdx = src.packedIndex(0);
int dstIdx = dst.packedIndex(0);
long[] srcHash = src.idHash;
long[] dstHash = dst.idHash;
return common(srcHash, srcIdx, dstHash, dstIdx);
}
private static long common(long[] srcHash, int srcIdx, //
long[] dstHash, int dstIdx) {
if (srcIdx == srcHash.length || dstIdx == dstHash.length)
return 0;
long common = 0;
int srcKey = keyOf(srcHash[srcIdx]);
int dstKey = keyOf(dstHash[dstIdx]);
for (;;) {
if (srcKey == dstKey) {
common += Math.min(countOf(srcHash[srcIdx]),
countOf(dstHash[dstIdx]));
if (++srcIdx == srcHash.length)
break;
srcKey = keyOf(srcHash[srcIdx]);
if (++dstIdx == dstHash.length)
break;
dstKey = keyOf(dstHash[dstIdx]);
} else if (srcKey < dstKey) {
// Regions of src which do not appear in dst.
if (++srcIdx == srcHash.length)
break;
srcKey = keyOf(srcHash[srcIdx]);
} else /* if (dstKey < srcKey) */{
// Regions of dst which do not appear in src.
if (++dstIdx == dstHash.length)
break;
dstKey = keyOf(dstHash[dstIdx]);
}
}
return common;
}
// Testing only
int size() {
return idSize;
}
// Testing only
int key(int idx) {
return keyOf(idHash[packedIndex(idx)]);
}
// Testing only
long count(int idx) {
return countOf(idHash[packedIndex(idx)]);
}
// Brute force approach only for testing.
int findIndex(int key) {
for (int i = 0; i < idSize; i++)
if (key(i) == key)
return i;
return -1;
}
private int packedIndex(int idx) {
return (idHash.length - idSize) + idx;
}
void add(int key, int cnt) throws TableFullException {
key = (key * 0x9e370001) >>> 1; // Mix bits and ensure not negative.
int j = slot(key);
for (;;) {
long v = idHash[j];
if (v == 0) {
// Empty slot in the table, store here.
if (idGrowAt <= idSize) {
grow();
j = slot(key);
continue;
}
idHash[j] = pair(key, cnt);
idSize++;
return;
} else if (keyOf(v) == key) {
// Same key, increment the counter. If it overflows, fail
// indexing to prevent the key from being impacted.
//
idHash[j] = pair(key, countOf(v) + cnt);
return;
} else if (++j >= idHash.length) {
j = 0;
}
}
}
private static long pair(int key, long cnt) throws TableFullException {
if (MAX_COUNT < cnt)
throw new TableFullException();
return (((long) key) << KEY_SHIFT) | cnt;
}
private int slot(int key) {
// We use 31 - idHashBits because the upper bit was already forced
// to be 0 and we want the remaining high bits to be used as the
// table slot.
//
return key >>> (31 - idHashBits);
}
private static int growAt(int idHashBits) {
return (1 << idHashBits) * (idHashBits - 3) / idHashBits;
}
@SuppressWarnings("UnusedException")
private void grow() throws TableFullException {
if (idHashBits == 30)
throw new TableFullException();
long[] oldHash = idHash;
int oldSize = idHash.length;
idHashBits++;
idGrowAt = growAt(idHashBits);
try {
idHash = new long[1 << idHashBits];
} catch (OutOfMemoryError noMemory) {
throw TABLE_FULL_OUT_OF_MEMORY;
}
for (int i = 0; i < oldSize; i++) {
long v = oldHash[i];
if (v != 0) {
int j = slot(keyOf(v));
while (idHash[j] != 0)
if (++j >= idHash.length)
j = 0;
idHash[j] = v;
}
}
}
private static int keyOf(long v) {
return (int) (v >>> KEY_SHIFT);
}
private static long countOf(long v) {
return v & MAX_COUNT;
}
/** Thrown by {@code create()} when file is too large. */
public static class TableFullException extends Exception {
private static final long serialVersionUID = 1L;
}
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 11672
Content-Disposition: inline; filename="SimilarityRenameDetector.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "fb98df7c9e8ab67f676151149ea5b483d1a8be74"
/*
* Copyright (C) 2010, Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.diff;
import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
import static org.eclipse.jgit.storage.pack.PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
import org.eclipse.jgit.api.errors.CanceledException;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ProgressMonitor;
class SimilarityRenameDetector {
/**
* Number of bits we need to express an index into src or dst list.
*
* This must be 28, giving us a limit of 2^28 entries in either list, which
* is an insane limit of 536,870,912 file names being considered in a single
* rename pass. The other 8 bits are used to store the score, while staying
* under 127 so the long doesn't go negative.
*/
private static final int BITS_PER_INDEX = 28;
private static final int INDEX_MASK = (1 << BITS_PER_INDEX) - 1;
private static final int SCORE_SHIFT = 2 * BITS_PER_INDEX;
private ContentSource.Pair reader;
/**
* All sources to consider for copies or renames.
*
* A source is typically a {@link ChangeType#DELETE} change, but could be
* another type when trying to perform copy detection concurrently with
* rename detection.
*/
private List
* A destination is typically an {@link ChangeType#ADD}, as the name has
* just come into existence, and we want to discover where its initial
* content came from.
*/
private List
* The upper 8 bits of each long stores the score, but the score is bounded
* to be in the range (0, 128] so that the highest bit is never set, and all
* entries are therefore positive.
*
* List indexes to an element of {@link #srcs} and {@link #dsts} are encoded
* as the lower two groups of 28 bits, respectively, but the encoding is
* inverted, so that 0 is expressed as {@code (1 << 28) - 1}. This sorts
* lower list indices later in the matrix, giving precedence to files whose
* names sort earlier in the tree.
*/
private long[] matrix;
/** Score a pair must exceed to be considered a rename. */
private int renameScore = 50;
/**
* File size threshold (in bytes) for detecting renames. Files larger
* than this size will not be processed for renames.
*/
private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
/** Skip content renames for binary files. */
private boolean skipBinaryFiles = false;
/** Set if any {@link SimilarityIndex.TableFullException} occurs. */
private boolean tableOverflow;
private List
* This sequence acts as a proxy for the real sequence, translating element
* indexes on the fly by adding {@code begin} to them. Sequences of this type
* must be used with a {@link org.eclipse.jgit.diff.SubsequenceComparator}.
*
* @param
* This comparator acts as a proxy for the real comparator, translating element
* indexes on the fly by adding the subsequence's begin offset to them.
* Comparators of this type must be used with a
* {@link org.eclipse.jgit.diff.Subsequence}.
*
* @param
*
*
* @return old name for this file.
*/
public String getOldPath() {
return oldPath;
}
/**
* Get the new name associated with this file.
* /dev/null
*
*
* @return new name for this file.
*/
public String getNewPath() {
return newPath;
}
/**
* Get the path associated with this file.
*
* @param side
* which path to obtain.
* @return name for this file.
*/
public String getPath(Side side) {
return side == Side.OLD ? getOldPath() : getNewPath();
}
/**
* Get diff attribute
*
* @return the {@link Attribute} determining filters to be applied.
* @since 4.11
*/
public Attribute getDiffAttribute() {
return diffAttribute;
}
/**
* Get the old file mode
*
* @return the old file mode, if described in the patch
*/
public FileMode getOldMode() {
return oldMode;
}
/**
* Get the new file mode
*
* @return the new file mode, if described in the patch
*/
public FileMode getNewMode() {
return newMode;
}
/**
* Get the mode associated with this file.
*
* @param side
* which mode to obtain.
* @return the mode.
*/
public FileMode getMode(Side side) {
return side == Side.OLD ? getOldMode() : getNewMode();
}
/**
* Get the change type
*
* @return the type of change this patch makes on {@link #getNewPath()}
*/
public ChangeType getChangeType() {
return changeType;
}
/**
* Get similarity score
*
* @return similarity score between {@link #getOldPath()} and
* {@link #getNewPath()} if {@link #getChangeType()} is
* {@link org.eclipse.jgit.diff.DiffEntry.ChangeType#COPY} or
* {@link org.eclipse.jgit.diff.DiffEntry.ChangeType#RENAME}.
*/
public int getScore() {
return score;
}
/**
* Get the old object id from the /dev/null
index
.
*
* @return the object id; null if there is no index line
*/
public AbbreviatedObjectId getOldId() {
return oldId;
}
/**
* Get the new object id from the index
.
*
* @return the object id; null if there is no index line
*/
public AbbreviatedObjectId getNewId() {
return newId;
}
/**
* Whether the mark tree filter with the specified index matched during scan
* or not, see {@link #scan(TreeWalk, boolean, TreeFilter...)}. Example:
*
*
* TreeFilter filterA = ...;
* TreeFilter filterB = ...;
* List<DiffEntry> entries = DiffEntry.scan(walk, false, filterA, filterB);
* DiffEntry entry = entries.get(0);
* boolean filterAMatched = entry.isMarked(0);
* boolean filterBMatched = entry.isMarked(1);
*
* [beginB, endB)
at
* beginA
.
* [beginA, endA)
.
* [beginA, endA)
with those found in [beginB, endB)
.
*/
public class Edit {
/** Type of edit */
public enum Type {
/** Sequence B has inserted the region. */
INSERT,
/** Sequence B has removed the region. */
DELETE,
/** Sequence B has replaced the region with different content. */
REPLACE,
/** Sequence A and B have zero length, describing nothing. */
EMPTY;
}
int beginA;
int endA;
int beginB;
int endB;
/**
* Create a new empty edit.
*
* @param as
* beginA: start and end of region in sequence A; 0 based.
* @param bs
* beginB: start and end of region in sequence B; 0 based.
*/
public Edit(int as, int bs) {
this(as, as, bs, bs);
}
/**
* Create a new edit.
*
* @param as
* beginA: start of region in sequence A; 0 based.
* @param ae
* endA: end of region in sequence A; must be >= as.
* @param bs
* beginB: start of region in sequence B; 0 based.
* @param be
* endB: end of region in sequence B; must be > = bs.
*/
public Edit(int as, int ae, int bs, int be) {
beginA = as;
endA = ae;
beginB = bs;
endB = be;
}
/**
* Get type
*
* @return the type of this region
*/
public final Type getType() {
if (beginA < endA) {
if (beginB < endB) {
return Type.REPLACE;
}
return Type.DELETE;
}
if (beginB < endB) {
return Type.INSERT;
}
// beginB == endB)
return Type.EMPTY;
}
/**
* Whether edit is empty
*
* @return {@code true} if the edit is empty (lengths of both a and b is
* zero)
*/
public final boolean isEmpty() {
return beginA == endA && beginB == endB;
}
/**
* Get start point in sequence A
*
* @return start point in sequence A
*/
public final int getBeginA() {
return beginA;
}
/**
* Get end point in sequence A
*
* @return end point in sequence A
*/
public final int getEndA() {
return endA;
}
/**
* Get start point in sequence B
*
* @return start point in sequence B
*/
public final int getBeginB() {
return beginB;
}
/**
* Get end point in sequence B
*
* @return end point in sequence B
*/
public final int getEndB() {
return endB;
}
/**
* Get length of the region in A
*
* @return length of the region in A
*/
public final int getLengthA() {
return endA - beginA;
}
/**
* Get length of the region in B
*
* @return return length of the region in B
*/
public final int getLengthB() {
return endB - beginB;
}
/**
* Move the edit region by the specified amount.
*
* @param amount
* the region is shifted by this amount, and can be positive or
* negative.
* @since 4.8
*/
public final void shift(int amount) {
beginA += amount;
endA += amount;
beginB += amount;
endB += amount;
}
/**
* Construct a new edit representing the region before cut.
*
* @param cut
* the cut point. The beginning A and B points are used as the
* end points of the returned edit.
* @return an edit representing the slice of {@code this} edit that occurs
* before {@code cut} starts.
*/
public final Edit before(Edit cut) {
return new Edit(beginA, cut.beginA, beginB, cut.beginB);
}
/**
* Construct a new edit representing the region after cut.
*
* @param cut
* the cut point. The ending A and B points are used as the
* starting points of the returned edit.
* @return an edit representing the slice of {@code this} edit that occurs
* after {@code cut} ends.
*/
public final Edit after(Edit cut) {
return new Edit(cut.endA, endA, cut.endB, endB);
}
/**
* Increase {@link #getEndA()} by 1.
*/
public void extendA() {
endA++;
}
/**
* Increase {@link #getEndB()} by 1.
*/
public void extendB() {
endB++;
}
/**
* Swap A and B, so the edit goes the other direction.
*/
public void swap() {
final int sBegin = beginA;
final int sEnd = endA;
beginA = beginB;
endA = endB;
beginB = sBegin;
endB = sEnd;
}
@Override
public int hashCode() {
return beginA ^ endA;
}
@Override
public boolean equals(Object o) {
if (o instanceof Edit) {
final Edit e = (Edit) o;
return this.beginA == e.beginA && this.endA == e.endA
&& this.beginB == e.beginB && this.endB == e.endB;
}
return false;
}
@SuppressWarnings("nls")
@Override
public String toString() {
final Type t = getType();
return t + "(" + beginA + "-" + endA + "," + beginB + "-" + endB + ")";
}
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 1316
Content-Disposition: inline; filename="EditList.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "85e23e9a92c11e7e21d42a47bfb7a993edb831a8"
/*
* Copyright (C) 2009, Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.diff;
import java.util.ArrayList;
/**
* Specialized list of {@link org.eclipse.jgit.diff.Edit}s in a document.
*/
public class EditList extends ArrayList
* the base sequence type.
*/
public final class HashedSequence extends Sequence {
final S base;
final int[] hashes;
HashedSequence(S base, int[] hashes) {
this.base = base;
this.hashes = hashes;
}
@Override
public int size() {
return base.size();
}
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 1374
Content-Disposition: inline; filename="HashedSequenceComparator.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "c7dfd9a18f77771c07c8d496cafd3b35d83c8178"
/*
* Copyright (C) 2010, Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.diff;
/**
* Wrap another comparator for use with
* {@link org.eclipse.jgit.diff.HashedSequence}.
*
* the base sequence type.
*/
public final class HashedSequenceComparator extends
SequenceComparator a, int ai, //
HashedSequence b, int bi) {
return a.hashes[ai] == b.hashes[bi]
&& cmp.equals(a.base, ai, b.base, bi);
}
@Override
public int hash(HashedSequence seq, int ptr) {
return seq.hashes[ptr];
}
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 2007
Content-Disposition: inline; filename="HashedSequencePair.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "870ca08497b48d907a31a5ce60e58f6b72344d87"
/*
* Copyright (C) 2010, Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.diff;
/**
* Wraps two {@link org.eclipse.jgit.diff.Sequence} instances to cache their
* element hash codes.
*
* the base sequence type.
*/
public class HashedSequencePair {
private final SequenceComparator super S> cmp;
private final S baseA;
private final S baseB;
private HashedSequence cachedA;
private HashedSequence cachedB;
/**
* Construct a pair to provide fast hash codes.
*
* @param cmp
* the base comparator for the sequence elements.
* @param a
* the A sequence.
* @param b
* the B sequence.
*/
public HashedSequencePair(SequenceComparator super S> cmp, S a, S b) {
this.cmp = cmp;
this.baseA = a;
this.baseB = b;
}
/**
* Get comparator
*
* @return obtain a comparator that uses the cached hash codes
*/
public HashedSequenceComparator getComparator() {
return new HashedSequenceComparator<>(cmp);
}
/**
* Get A
*
* @return wrapper around A that includes cached hash codes
*/
public HashedSequence getA() {
if (cachedA == null)
cachedA = wrap(baseA);
return cachedA;
}
/**
* Get B
*
* @return wrapper around B that includes cached hash codes
*/
public HashedSequence getB() {
if (cachedB == null)
cachedB = wrap(baseB);
return cachedB;
}
private HashedSequence wrap(S base) {
final int end = base.size();
final int[] hashes = new int[end];
for (int ptr = 0; ptr < end; ptr++)
hashes[ptr] = cmp.hash(base, ptr);
return new HashedSequence<>(base, hashes);
}
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 6609
Content-Disposition: inline; filename="HistogramDiff.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "bb72e1faedb3edfba058ec1ed5fdd09e2fd4ab36"
/*
* Copyright (C) 2010, Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.diff;
import java.util.ArrayList;
import java.util.List;
/**
* An extended form of Bram Cohen's patience diff algorithm.
* void diffNonCommon(EditList edits,
HashedSequenceComparator cmp, HashedSequence a,
HashedSequence b, Edit region) {
new State<>(edits, cmp, a, b).diffRegion(region);
}
private class State {
private final HashedSequenceComparator cmp;
private final HashedSequence a;
private final HashedSequence b;
private final List cmp,
HashedSequence a, HashedSequence b) {
this.cmp = cmp;
this.a = a;
this.b = b;
this.edits = edits;
}
void diffRegion(Edit r) {
diffReplace(r);
while (!queue.isEmpty())
diff(queue.remove(queue.size() - 1));
}
private void diffReplace(Edit r) {
Edit lcs = new HistogramDiffIndex<>(maxChainLength, cmp, a, b, r)
.findLongestCommonSequence();
if (lcs != null) {
// If we were given an edit, we can prove a result here.
//
if (lcs.isEmpty()) {
// An empty edit indicates there is nothing in common.
// Replace the entire region.
//
edits.add(r);
} else {
queue.add(r.after(lcs));
queue.add(r.before(lcs));
}
} else if (fallback instanceof LowLevelDiffAlgorithm) {
LowLevelDiffAlgorithm fb = (LowLevelDiffAlgorithm) fallback;
fb.diffNonCommon(edits, cmp, a, b, r);
} else if (fallback != null) {
SubsequenceComparator
* type of the base sequence.
*/
final class HistogramDiffIndex {
private static final int REC_NEXT_SHIFT = 28 + 8;
private static final int REC_PTR_SHIFT = 8;
private static final int REC_PTR_MASK = (1 << 28) - 1;
private static final int REC_CNT_MASK = (1 << 8) - 1;
private static final int MAX_PTR = REC_PTR_MASK;
private static final int MAX_CNT = (1 << 8) - 1;
private final int maxChainLength;
private final HashedSequenceComparator cmp;
private final HashedSequence a;
private final HashedSequence b;
private final Edit region;
/** Keyed by {@link #hash(HashedSequence, int)} for {@link #recs} index. */
private final int[] table;
/** Number of low bits to discard from a key to index {@link #table}. */
private final int keyShift;
/**
* Describes a unique element in sequence A.
*
* The records in this table are actually 3-tuples of:
*
*
*
* The occurrence count is capped at {@link #MAX_CNT}, as the field is only
* a few bits wide. Elements that occur more frequently will have their
* count capped.
*/
private long[] recs;
/** Number of elements in {@link #recs}; also is the unique element count. */
private int recCnt;
/**
* For {@code ptr}, {@code next[ptr - ptrShift]} has subsequent index.
*
* For the sequence element {@code ptr}, the value stored at location
* {@code next[ptr - ptrShift]} is the next occurrence of the exact same
* element in the sequence.
*
* Chains always run from the lowest index to the largest index. Therefore
* the array will store {@code next[1] = 2}, but never {@code next[2] = 1}.
* This allows a chain to terminate with {@code 0}, as {@code 0} would never
* be a valid next element.
*
* The array is sized to be {@code region.getLengthA()} and element indexes
* are converted to array indexes by subtracting {@link #ptrShift}, which is
* just a cached version of {@code region.beginA}.
*/
private int[] next;
/**
* For element {@code ptr} in A, index of the record in {@link #recs} array.
*
* The record at {@code recs[recIdx[ptr - ptrShift]]} is the record
* describing all occurrences of the element appearing in sequence A at
* position {@code ptr}. The record is needed to get the occurrence count of
* the element, or to locate all other occurrences of that element within
* sequence A. This index provides constant-time access to the record, and
* avoids needing to scan the hash chain.
*/
private int[] recIdx;
/** Value to subtract from element indexes to key {@link #next} array. */
private int ptrShift;
private Edit lcs;
private int cnt;
private boolean hasCommon;
HistogramDiffIndex(int maxChainLength, HashedSequenceComparator cmp,
HashedSequence a, HashedSequence b, Edit r) {
this.maxChainLength = maxChainLength;
this.cmp = cmp;
this.a = a;
this.b = b;
this.region = r;
if (region.endA >= MAX_PTR)
throw new IllegalArgumentException(
JGitText.get().sequenceTooLargeForDiffAlgorithm);
final int sz = r.getLengthA();
final int tableBits = tableBits(sz);
table = new int[1 << tableBits];
keyShift = 32 - tableBits;
ptrShift = r.beginA;
recs = new long[Math.max(4, sz >>> 3)];
next = new int[sz];
recIdx = new int[sz];
}
Edit findLongestCommonSequence() {
if (!scanA())
return null;
lcs = new Edit(0, 0);
cnt = maxChainLength + 1;
for (int bPtr = region.beginB; bPtr < region.endB;)
bPtr = tryLongestCommonSequence(bPtr);
return hasCommon && maxChainLength < cnt ? null : lcs;
}
private boolean scanA() {
// Scan the elements backwards, inserting them into the hash table
// as we go. Going in reverse places the earliest occurrence of any
// element at the start of the chain, so we consider earlier matches
// before later matches.
//
SCAN: for (int ptr = region.endA - 1; region.beginA <= ptr; ptr--) {
final int tIdx = hash(a, ptr);
int chainLen = 0;
for (int rIdx = table[tIdx]; rIdx != 0;) {
final long rec = recs[rIdx];
if (cmp.equals(a, recPtr(rec), a, ptr)) {
// ptr is identical to another element. Insert it onto
// the front of the existing element chain.
//
int newCnt = recCnt(rec) + 1;
if (MAX_CNT < newCnt)
newCnt = MAX_CNT;
recs[rIdx] = recCreate(recNext(rec), ptr, newCnt);
next[ptr - ptrShift] = recPtr(rec);
recIdx[ptr - ptrShift] = rIdx;
continue SCAN;
}
rIdx = recNext(rec);
chainLen++;
}
if (chainLen == maxChainLength)
return false;
// This is the first time we have ever seen this particular
// element in the sequence. Construct a new chain for it.
//
final int rIdx = ++recCnt;
if (rIdx == recs.length) {
int sz = Math.min(recs.length << 1, 1 + region.getLengthA());
long[] n = new long[sz];
System.arraycopy(recs, 0, n, 0, recs.length);
recs = n;
}
recs[rIdx] = recCreate(table[tIdx], ptr, 1);
recIdx[ptr - ptrShift] = rIdx;
table[tIdx] = rIdx;
}
return true;
}
private int tryLongestCommonSequence(int bPtr) {
int bNext = bPtr + 1;
int rIdx = table[hash(b, bPtr)];
for (long rec; rIdx != 0; rIdx = recNext(rec)) {
rec = recs[rIdx];
// If there are more occurrences in A, don't use this chain.
if (recCnt(rec) > cnt) {
if (!hasCommon)
hasCommon = cmp.equals(a, recPtr(rec), b, bPtr);
continue;
}
int as = recPtr(rec);
if (!cmp.equals(a, as, b, bPtr))
continue;
hasCommon = true;
TRY_LOCATIONS: for (;;) {
int np = next[as - ptrShift];
int bs = bPtr;
int ae = as + 1;
int be = bs + 1;
int rc = recCnt(rec);
while (region.beginA < as && region.beginB < bs
&& cmp.equals(a, as - 1, b, bs - 1)) {
as--;
bs--;
if (1 < rc)
rc = Math.min(rc, recCnt(recs[recIdx[as - ptrShift]]));
}
while (ae < region.endA && be < region.endB
&& cmp.equals(a, ae, b, be)) {
if (1 < rc)
rc = Math.min(rc, recCnt(recs[recIdx[ae - ptrShift]]));
ae++;
be++;
}
if (bNext < be)
bNext = be;
if (lcs.getLengthA() < ae - as || rc < cnt) {
// If this region is the longest, or there are less
// occurrences of it in A, its now our LCS.
//
lcs.beginA = as;
lcs.beginB = bs;
lcs.endA = ae;
lcs.endB = be;
cnt = rc;
}
// Because we added elements in reverse order index 0
// cannot possibly be the next position. Its the first
// element of the sequence and thus would have been the
// value of as at the start of the TRY_LOCATIONS loop.
//
if (np == 0)
break TRY_LOCATIONS;
while (np < ae) {
// The next location to consider was actually within
// the LCS we examined above. Don't reconsider it.
//
np = next[np - ptrShift];
if (np == 0)
break TRY_LOCATIONS;
}
as = np;
}
}
return bNext;
}
private int hash(HashedSequence s, int idx) {
return (cmp.hash(s, idx) * 0x9e370001 /* mix bits */) >>> keyShift;
}
private static long recCreate(int next, int ptr, int cnt) {
return ((long) next << REC_NEXT_SHIFT) //
| ((long) ptr << REC_PTR_SHIFT) //
| cnt;
}
private static int recNext(long rec) {
return (int) (rec >>> REC_NEXT_SHIFT);
}
private static int recPtr(long rec) {
return ((int) (rec >>> REC_PTR_SHIFT)) & REC_PTR_MASK;
}
private static int recCnt(long rec) {
return ((int) rec) & REC_CNT_MASK;
}
private static int tableBits(int sz) {
int bits = 31 - Integer.numberOfLeadingZeros(sz);
if (bits == 0)
bits = 1;
if (1 << bits < sz)
bits++;
return bits;
}
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 2476
Content-Disposition: inline; filename="LowLevelDiffAlgorithm.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "39be43d12d62a0e60a5aa2ff329794ea87e1a8e0"
/*
* Copyright (C) 2010, Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.diff;
/**
* Compares two sequences primarily based upon hash codes.
*/
public abstract class LowLevelDiffAlgorithm extends DiffAlgorithm {
@Override
public EditList diffNonCommon(
SequenceComparator super S> cmp, S a, S b) {
HashedSequencePair p = new HashedSequencePair<>(cmp, a, b);
HashedSequenceComparator hc = p.getComparator();
HashedSequence ha = p.getA();
HashedSequence hb = p.getB();
p = null;
EditList res = new EditList();
Edit region = new Edit(0, a.size(), 0, b.size());
diffNonCommon(res, hc, ha, hb, region);
return res;
}
/**
* Compare two sequences and identify a list of edits between them.
*
* This method should be invoked only after the two sequences have been
* proven to have no common starting or ending elements. The expected
* elimination of common starting and ending elements is automatically
* performed by the {@link #diff(SequenceComparator, Sequence, Sequence)}
* method, which invokes this method using
* {@link org.eclipse.jgit.diff.Subsequence}s.
*
* @param
* type of Sequence compared
* @param edits
* result list to append the region's edits onto.
* @param cmp
* the comparator supplying the element equivalence function.
* @param a
* the first (also known as old or pre-image) sequence. Edits
* returned by this algorithm will reference indexes using the
* 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
* {@link org.eclipse.jgit.diff.Edit#getEndA()}.
* @param b
* the second (also known as new or post-image) sequence. Edits
* returned by this algorithm will reference indexes using the
* 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
* {@link org.eclipse.jgit.diff.Edit#getEndB()}.
* @param region
* the region being compared within the two sequences.
*/
public abstract void diffNonCommon(EditList edits,
HashedSequenceComparator cmp, HashedSequence a,
HashedSequence b, Edit region);
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 16470
Content-Disposition: inline; filename="MyersDiff.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "bedb0b335c24a1fe924cfb274d1cf7611f93fa8b"
/*
* Copyright (C) 2008-2009, Johannes E. Schindelin
* H E L L O W O R L D
* ____
* L \___
* O \___
* W \________
*
*
* O(N * D^2 + 2 * N/2 * (D/2)^2 + 4 * N/4 * (D/4)^2 + ...)
* = O(N * D^2 * 5 / 4) = O(N * D^2),
*
*
* type of sequence.
*/
@SuppressWarnings("hiding")
public class MyersDiff {
/** Singleton instance of MyersDiff. */
public static final DiffAlgorithm INSTANCE = new LowLevelDiffAlgorithm() {
@SuppressWarnings("unused")
@Override
public void diffNonCommon(EditList edits,
HashedSequenceComparator cmp, HashedSequence a,
HashedSequence b, Edit region) {
new MyersDiff<>(edits, cmp, a, b, region);
}
};
/**
* The list of edits found during the last call to
* {@link #calculateEdits(Edit)}
*/
protected EditList edits;
/** Comparison function for sequences. */
protected HashedSequenceComparator cmp;
/**
* The first text to be compared. Referred to as "Text A" in the comments
*/
protected HashedSequence a;
/**
* The second text to be compared. Referred to as "Text B" in the comments
*/
protected HashedSequence b;
private MyersDiff(EditList edits, HashedSequenceComparator cmp,
HashedSequence a, HashedSequence b, Edit region) {
this.edits = edits;
this.cmp = cmp;
this.a = a;
this.b = b;
calculateEdits(region);
}
// TODO: use ThreadLocal for future multi-threaded operations
MiddleEdit middle = new MiddleEdit();
/**
* Entrypoint into the algorithm this class is all about. This method triggers that the
* differences between A and B are calculated in form of a list of edits.
* @param r portion of the sequences to examine.
*/
private void calculateEdits(Edit r) {
middle.initialize(r.beginA, r.endA, r.beginB, r.endB);
if (middle.beginA >= middle.endA &&
middle.beginB >= middle.endB)
return;
calculateEdits(middle.beginA, middle.endA,
middle.beginB, middle.endB);
}
/**
* Calculates the differences between a given part of A against another
* given part of B
*
* @param beginA
* start of the part of A which should be compared
* (0<=beginA<sizeof(A))
* @param endA
* end of the part of A which should be compared
* (beginA<=endA<sizeof(A))
* @param beginB
* start of the part of B which should be compared
* (0<=beginB<sizeof(B))
* @param endB
* end of the part of B which should be compared
* (beginB<=endB<sizeof(B))
*/
protected void calculateEdits(int beginA, int endA,
int beginB, int endB) {
Edit edit = middle.calculate(beginA, endA, beginB, endB);
if (beginA < edit.beginA || beginB < edit.beginB) {
int k = edit.beginB - edit.beginA;
int x = middle.backward.snake(k, edit.beginA);
calculateEdits(beginA, x, beginB, k + x);
}
if (edit.getType() != Edit.Type.EMPTY)
edits.add(edits.size(), edit);
// after middle
if (endA > edit.endA || endB > edit.endB) {
int k = edit.endB - edit.endA;
int x = middle.forward.snake(k, edit.endA);
calculateEdits(x, endA, k + x, endB);
}
}
/**
* A class to help bisecting the sequences a and b to find minimal
* edit paths.
*
* As the arrays are reused for space efficiency, you will need one
* instance per thread.
*
* The entry function is the calculate() method.
*/
class MiddleEdit {
void initialize(int beginA, int endA, int beginB, int endB) {
this.beginA = beginA; this.endA = endA;
this.beginB = beginB; this.endB = endB;
// strip common parts on either end
int k = beginB - beginA;
this.beginA = forward.snake(k, beginA);
this.beginB = k + this.beginA;
k = endB - endA;
this.endA = backward.snake(k, endA);
this.endB = k + this.endA;
}
/*
* This function calculates the "middle" Edit of the shortest
* edit path between the given subsequences of a and b.
*
* Once a forward path and a backward path meet, we found the
* middle part. From the last snake end point on both of them,
* we construct the Edit.
*
* It is assumed that there is at least one edit in the range.
*/
// TODO: measure speed impact when this is synchronized
Edit calculate(int beginA, int endA, int beginB, int endB) {
if (beginA == endA || beginB == endB)
return new Edit(beginA, endA, beginB, endB);
this.beginA = beginA; this.endA = endA;
this.beginB = beginB; this.endB = endB;
/*
* Following the conventions in Myers' paper, "k" is
* the difference between the index into "b" and the
* index into "a".
*/
int minK = beginB - endA;
int maxK = endB - beginA;
forward.initialize(beginB - beginA, beginA, minK, maxK);
backward.initialize(endB - endA, endA, minK, maxK);
for (int d = 1; ; d++)
if (forward.calculate(d) ||
backward.calculate(d))
return edit;
}
/*
* For each d, we need to hold the d-paths for the diagonals
* k = -d, -d + 2, ..., d - 2, d. These are stored in the
* forward (and backward) array.
*
* As we allow subsequences, too, this needs some refinement:
* the forward paths start on the diagonal forwardK =
* beginB - beginA, and backward paths start on the diagonal
* backwardK = endB - endA.
*
* So, we need to hold the forward d-paths for the diagonals
* k = forwardK - d, forwardK - d + 2, ..., forwardK + d and
* the analogue for the backward d-paths. This means that
* we can turn (k, d) into the forward array index using this
* formula:
*
* i = (d + k - forwardK) / 2
*
* There is a further complication: the edit paths should not
* leave the specified subsequences, so k is bounded by
* minK = beginB - endA and maxK = endB - beginA. However,
* (k - forwardK) _must_ be odd whenever d is odd, and it
* _must_ be even when d is even.
*
* The values in the "forward" and "backward" arrays are
* positions ("x") in the sequence a, to get the corresponding
* positions ("y") in the sequence b, you have to calculate
* the appropriate k and then y:
*
* k = forwardK - d + i * 2
* y = k + x
*
* (substitute backwardK for forwardK if you want to get the
* y position for an entry in the "backward" array.
*/
EditPaths forward = new ForwardEditPaths();
EditPaths backward = new BackwardEditPaths();
/* Some variables which are shared between methods */
protected int beginA, endA, beginB, endB;
protected Edit edit;
abstract class EditPaths {
private IntList x = new IntList();
private LongList snake = new LongList();
int beginK, endK, middleK;
int prevBeginK, prevEndK;
/* if we hit one end early, no need to look further */
int minK, maxK; // TODO: better explanation
final int getIndex(int d, int k) {
// TODO: remove
if (((d + k - middleK) % 2) != 0)
throw new RuntimeException(MessageFormat.format(JGitText.get().unexpectedOddResult, Integer.valueOf(d), Integer.valueOf(k), Integer.valueOf(middleK)));
return (d + k - middleK) / 2;
}
final int getX(int d, int k) {
// TODO: remove
if (k < beginK || k > endK)
throw new RuntimeException(MessageFormat.format(JGitText.get().kNotInRange, Integer.valueOf(k), Integer.valueOf(beginK), Integer.valueOf(endK)));
return x.get(getIndex(d, k));
}
final long getSnake(int d, int k) {
// TODO: remove
if (k < beginK || k > endK)
throw new RuntimeException(MessageFormat.format(JGitText.get().kNotInRange, Integer.valueOf(k), Integer.valueOf(beginK), Integer.valueOf(endK)));
return snake.get(getIndex(d, k));
}
private int forceKIntoRange(int k) {
/* if k is odd, so must be the result */
if (k < minK)
return minK + ((k ^ minK) & 1);
else if (k > maxK)
return maxK - ((k ^ maxK) & 1);
return k;
}
void initialize(int k, int x, int minK, int maxK) {
this.minK = minK;
this.maxK = maxK;
beginK = endK = middleK = k;
this.x.clear();
this.x.add(x);
snake.clear();
snake.add(newSnake(k, x));
}
abstract int snake(int k, int x);
abstract int getLeft(int x);
abstract int getRight(int x);
abstract boolean isBetter(int left, int right);
abstract void adjustMinMaxK(int k, int x);
abstract boolean meets(int d, int k, int x, long snake);
final long newSnake(int k, int x) {
long y = (long) k + x;
long ret = ((long) x) << 32;
return ret | y;
}
final int snake2x(long snake) {
return (int) (snake >>> 32);
}
final int snake2y(long snake) {
return (int) snake;
}
final boolean makeEdit(long snake1, long snake2) {
int x1 = snake2x(snake1), x2 = snake2x(snake2);
int y1 = snake2y(snake1), y2 = snake2y(snake2);
/*
* Check for incompatible partial edit paths:
* when there are ambiguities, we might have
* hit incompatible (i.e. non-overlapping)
* forward/backward paths.
*
* In that case, just pretend that we have
* an empty edit at the end of one snake; this
* will force a decision which path to take
* in the next recursion step.
*/
if (x1 > x2 || y1 > y2) {
x1 = x2;
y1 = y2;
}
edit = new Edit(x1, x2, y1, y2);
return true;
}
boolean calculate(int d) {
prevBeginK = beginK;
prevEndK = endK;
beginK = forceKIntoRange(middleK - d);
endK = forceKIntoRange(middleK + d);
// TODO: handle i more efficiently
// TODO: walk snake(k, getX(d, k)) only once per (d, k)
// TODO: move end points out of the loop to avoid conditionals inside the loop
// go backwards so that we can avoid temp vars
for (int k = endK; k >= beginK; k -= 2) {
if (Thread.interrupted()) {
throw new DiffInterruptedException();
}
int left = -1, right = -1;
long leftSnake = -1L, rightSnake = -1L;
// TODO: refactor into its own function
if (k > prevBeginK) {
int i = getIndex(d - 1, k - 1);
left = x.get(i);
int end = snake(k - 1, left);
leftSnake = left != end ?
newSnake(k - 1, end) :
snake.get(i);
if (meets(d, k - 1, end, leftSnake))
return true;
left = getLeft(end);
}
if (k < prevEndK) {
int i = getIndex(d - 1, k + 1);
right = x.get(i);
int end = snake(k + 1, right);
rightSnake = right != end ?
newSnake(k + 1, end) :
snake.get(i);
if (meets(d, k + 1, end, rightSnake))
return true;
right = getRight(end);
}
int newX;
long newSnake;
if (k >= prevEndK ||
(k > prevBeginK &&
isBetter(left, right))) {
newX = left;
newSnake = leftSnake;
}
else {
newX = right;
newSnake = rightSnake;
}
if (meets(d, k, newX, newSnake))
return true;
adjustMinMaxK(k, newX);
int i = getIndex(d, k);
x.set(i, newX);
snake.set(i, newSnake);
}
return false;
}
}
class ForwardEditPaths extends EditPaths {
@Override
final int snake(int k, int x) {
for (; x < endA && k + x < endB; x++)
if (!cmp.equals(a, x, b, k + x))
break;
return x;
}
@Override
final int getLeft(int x) {
return x;
}
@Override
final int getRight(int x) {
return x + 1;
}
@Override
final boolean isBetter(int left, int right) {
return left > right;
}
@Override
final void adjustMinMaxK(int k, int x) {
if (x >= endA || k + x >= endB) {
if (k > backward.middleK)
maxK = k;
else
minK = k;
}
}
@Override
final boolean meets(int d, int k, int x, long snake) {
if (k < backward.beginK || k > backward.endK)
return false;
// TODO: move out of loop
if (((d - 1 + k - backward.middleK) % 2) != 0)
return false;
if (x < backward.getX(d - 1, k))
return false;
makeEdit(snake, backward.getSnake(d - 1, k));
return true;
}
}
class BackwardEditPaths extends EditPaths {
@Override
final int snake(int k, int x) {
for (; x > beginA && k + x > beginB; x--)
if (!cmp.equals(a, x - 1, b, k + x - 1))
break;
return x;
}
@Override
final int getLeft(int x) {
return x - 1;
}
@Override
final int getRight(int x) {
return x;
}
@Override
final boolean isBetter(int left, int right) {
return left < right;
}
@Override
final void adjustMinMaxK(int k, int x) {
if (x <= beginA || k + x <= beginB) {
if (k > forward.middleK)
maxK = k;
else
minK = k;
}
}
@Override
final boolean meets(int d, int k, int x, long snake) {
if (k < forward.beginK || k > forward.endK)
return false;
// TODO: move out of loop
if (((d + k - forward.middleK) % 2) != 0)
return false;
if (x > forward.getX(d, k))
return false;
makeEdit(forward.getSnake(d, k), snake);
return true;
}
}
}
/**
* Main method
*
* @param args
* two filenames specifying the contents to be diffed
*/
public static void main(String[] args) {
if (args.length != 2) {
err().println(JGitText.get().need2Arguments);
System.exit(1);
}
try {
RawText a = new RawText(new java.io.File(args[0]));
RawText b = new RawText(new java.io.File(args[1]));
EditList r = INSTANCE.diff(RawTextComparator.DEFAULT, a, b);
System.out.println(r.toString());
} catch (Exception e) {
PrintWriter err = err();
err.println(e.getMessage());
e.printStackTrace(err);
}
}
private static PrintWriter err() {
return new PrintWriter(new OutputStreamWriter(System.err, UTF_8));
}
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 1670
Content-Disposition: inline; filename="PatchIdDiffFormatter.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "b401bbe73db80c541335d48c47e9a7fe2c61ba8b"
/*
* Copyright (C) 2011, Stefan Lay null
*/
public String getLineDelimiter() {
if (size() == 0) {
return null;
}
int e = getEnd(0);
if (content[e - 1] != '\n') {
return null;
}
if (content.length > 1 && e > 1 && content[e - 2] == '\r') {
return "\r\n"; //$NON-NLS-1$
}
return "\n"; //$NON-NLS-1$
}
/**
* Read a blob object into RawText, or throw BinaryBlobException if the blob
* is binary.
*
* @param ldr
* the ObjectLoader for the blob
* @param threshold
* if the blob is larger than this size, it is always assumed to
* be binary.
* @since 4.10
* @return the RawText representing the blob.
* @throws org.eclipse.jgit.errors.BinaryBlobException
* if the blob contains binary data.
* @throws java.io.IOException
* if the input could not be read.
*/
public static RawText load(ObjectLoader ldr, int threshold)
throws IOException, BinaryBlobException {
long sz = ldr.getSize();
if (sz > threshold) {
throw new BinaryBlobException();
}
int bufferSize = getBufferSize();
if (sz <= bufferSize) {
byte[] data = ldr.getCachedBytes(bufferSize);
if (isBinary(data, data.length, true)) {
throw new BinaryBlobException();
}
return new RawText(data);
}
byte[] head = new byte[bufferSize];
try (InputStream stream = ldr.openStream()) {
int off = 0;
int left = head.length;
byte last = 'x'; // Just something inconspicuous
while (left > 0) {
int n = stream.read(head, off, left);
if (n < 0) {
throw new EOFException();
}
left -= n;
while (n > 0) {
byte curr = head[off];
if (isBinary(curr, last)) {
throw new BinaryBlobException();
}
last = curr;
off++;
n--;
}
}
byte[] data;
try {
data = new byte[(int)sz];
} catch (OutOfMemoryError e) {
throw new LargeObjectException.OutOfMemory(e);
}
System.arraycopy(head, 0, data, 0, head.length);
IO.readFully(stream, data, off, (int) (sz-off));
return new RawText(data, RawParseUtils.lineMapOrBinary(data, 0, (int) sz));
}
}
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 7954
Content-Disposition: inline; filename="RawTextComparator.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "e06e2f8290a3037482a4da1c8d9eb3c4bce75585"
/*
* Copyright (C) 2009-2010, Google Inc.
* Copyright (C) 2008-2009, Johannes E. Schindelin [ptr, end)
of raw.
*/
protected abstract int hashRegion(byte[] raw, int ptr, int end);
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 23568
Content-Disposition: inline; filename="RenameDetector.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "fd84bc6c87899399fa8cfffdc512075434338930"
/*
* Copyright (C) 2010, Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.diff;
import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
import static org.eclipse.jgit.storage.pack.PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import org.eclipse.jgit.api.errors.CanceledException;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.lib.Repository;
/**
* Detect and resolve object renames.
*/
public class RenameDetector {
private static final int EXACT_RENAME_SCORE = 100;
private static final Comparator> nonUniqueAdds = new ArrayList<>();
for (Object o : addedMap.values()) {
if (o instanceof DiffEntry)
uniqueAdds.add((DiffEntry) o);
else
nonUniqueAdds.add((List
[0, {@link #size()})
, like a standard Java List implementation.
* Unlike a List, the members of the sequence are not directly obtainable.
*
* type of sequence the comparator supports.
*/
public abstract class SequenceComparator {
/**
* Compare two items to determine if they are equivalent.
*
* It is permissible to compare sequence {@code a} with itself (by passing
* {@code a} again in position {@code b}).
*
* @param a
* the first sequence.
* @param ai
* item of {@code ai} to compare.
* @param b
* the second sequence.
* @param bi
* item of {@code bi} to compare.
* @return true if the two items are identical according to this function's
* equivalence rule.
*/
public abstract boolean equals(S a, int ai, S b, int bi);
/**
* Get a hash value for an item in a sequence.
*
* If two items are equal according to this comparator's
* {@link #equals(Sequence, int, Sequence, int)} method, then this hash
* method must produce the same integer result for both items.
*
* It is not required for two items to have different hash values if they
* are unequal according to the {@code equals()} method.
*
* @param seq
* the sequence.
* @param ptr
* the item to obtain the hash for.
* @return hash the hash value.
*/
public abstract int hash(S seq, int ptr);
/**
* Modify the edit to remove common leading and trailing items.
*
* The supplied edit {@code e} is reduced in size by moving the beginning A
* and B points so the edit does not cover any items that are in common
* between the two sequences. The ending A and B points are also shifted to
* remove common items from the end of the region.
*
* @param a
* the first sequence.
* @param b
* the second sequence.
* @param e
* the edit to start with and update.
* @return {@code e} if it was updated in-place, otherwise a new edit
* containing the reduced region.
*/
public Edit reduceCommonStartEnd(S a, S b, Edit e) {
// Skip over items that are common at the start.
//
while (e.beginA < e.endA && e.beginB < e.endB
&& equals(a, e.beginA, b, e.beginB)) {
e.beginA++;
e.beginB++;
}
// Skip over items that are common at the end.
//
while (e.beginA < e.endA && e.beginB < e.endB
&& equals(a, e.endA - 1, b, e.endB - 1)) {
e.endA--;
e.endB--;
}
return e;
}
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 10827
Content-Disposition: inline; filename="SimilarityIndex.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "34581aefb5f8e5175ff4eaa40564430558afae11"
/*
* Copyright (C) 2010, Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.diff;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectStream;
/**
* Index structure of lines/blocks in one file.
*
* the base sequence type.
*/
public final class Subsequence extends Sequence {
/**
* Construct a subsequence around the A region/base sequence.
*
* @param
* type of returned Sequence
* @param a
* the A sequence.
* @param region
* the region of {@code a} to create a subsequence around.
* @return subsequence of {@code base} as described by A in {@code region}.
*/
public static Subsequence a(S a, Edit region) {
return new Subsequence<>(a, region.beginA, region.endA);
}
/**
* Construct a subsequence around the B region/base sequence.
*
* @param
* type of returned Sequence
* @param b
* the B sequence.
* @param region
* the region of {@code b} to create a subsequence around.
* @return subsequence of {@code base} as described by B in {@code region}.
*/
public static Subsequence b(S b, Edit region) {
return new Subsequence<>(b, region.beginB, region.endB);
}
/**
* Adjust the Edit to reflect positions in the base sequence.
*
* @param
* type of returned Sequence
* @param e
* edit to adjust in-place. Prior to invocation the indexes are
* in terms of the two subsequences; after invocation the indexes
* are in terms of the base sequences.
* @param a
* the A sequence.
* @param b
* the B sequence.
*/
public static void toBase(Edit e, Subsequence a,
Subsequence b) {
e.beginA += a.begin;
e.endA += a.begin;
e.beginB += b.begin;
e.endB += b.begin;
}
/**
* Adjust the Edits to reflect positions in the base sequence.
*
* @param
* type of returned Sequence
* @param edits
* edits to adjust in-place. Prior to invocation the indexes are
* in terms of the two subsequences; after invocation the indexes
* are in terms of the base sequences.
* @param a
* the A sequence.
* @param b
* the B sequence.
* @return always {@code edits} (as the list was updated in-place).
*/
public static EditList toBase(EditList edits,
Subsequence a, Subsequence b) {
for (Edit e : edits)
toBase(e, a, b);
return edits;
}
final S base;
final int begin;
private final int size;
/**
* Construct a subset of another sequence.
*
* The size of the subsequence will be {@code end - begin}.
*
* @param base
* the real sequence.
* @param begin
* First element index of {@code base} that will be part of this
* new subsequence. The element at {@code begin} will be this
* sequence's element 0.
* @param end
* One past the last element index of {@code base} that will be
* part of this new subsequence.
*/
public Subsequence(S base, int begin, int end) {
this.base = base;
this.begin = begin;
this.size = end - begin;
}
@Override
public int size() {
return size;
}
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 1375
Content-Disposition: inline; filename="SubsequenceComparator.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "707df59259ad6bc4d73335f2b36d4a0b228e7d9a"
/*
* Copyright (C) 2010, Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.diff;
/**
* Wrap another comparator for use with
* {@link org.eclipse.jgit.diff.Subsequence}.
*
* the base sequence type.
*/
public final class SubsequenceComparator extends
SequenceComparator a, int ai, Subsequence b, int bi) {
return cmp.equals(a.base, ai + a.begin, b.base, bi + b.begin);
}
@Override
public int hash(Subsequence seq, int ptr) {
return cmp.hash(seq.base, ptr + seq.begin);
}
}
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Length: 86
Content-Disposition: inline; filename="package-info.java"
Last-Modified: Sat, 05 Jul 2025 19:27:54 GMT
Expires: Sat, 05 Jul 2025 19:32:54 GMT
ETag: "98c44e11a19fdd4ce651ba868a8e31512831fd2f"
/**
* Comparing file contents by computing diffs.
*/
package org.eclipse.jgit.diff;