aboutsummaryrefslogtreecommitdiffstats
path: root/org.eclipse.jgit
diff options
context:
space:
mode:
authorMarc Strapetz <marc.strapetz@syntevo.com>2014-10-30 19:36:49 +0100
committerMatthias Sohn <matthias.sohn@sap.com>2015-01-02 23:23:22 +0100
commit1cb566844116fc280969e4a89be8fa1c6e3a7d72 (patch)
treee2b523768e18d767a958b86d4bfb3ab69872978a /org.eclipse.jgit
parentc053900c5bc89d2b55c563e04bc1edab53cdc143 (diff)
downloadjgit-1cb566844116fc280969e4a89be8fa1c6e3a7d72.tar.gz
jgit-1cb566844116fc280969e4a89be8fa1c6e3a7d72.zip
Rename detection should canonicalize line endings
Native Git canonicalizes line endings when detecting renames, more specifically it replaces CRLF by LF. See: hash_chars in diffcore-delta.c Bug: 449545 Change-Id: Iec2aab12ae9e67074cccb7fbd4d9defe176a0130 Signed-off-by: Marc Strapetz <marc.strapetz@syntevo.com> Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
Diffstat (limited to 'org.eclipse.jgit')
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java64
1 files changed, 42 insertions, 22 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
index 17ccb9726f..f376b8e36e 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
@@ -79,8 +79,11 @@ class SimilarityIndex {
/** Maximum value of the count field, also mask to extract the count. */
private static final long MAX_COUNT = (1L << KEY_SHIFT) - 1;
- /** Total size of the file we hashed into the structure. */
- private long fileSize;
+ /**
+ * Total amount of bytes hashed into the structure, including \n. This is
+ * usually the size of the file minus number of CRLF encounters.
+ */
+ private long hashedCnt;
/** Number of non-zero entries in {@link #idHash}. */
private int idSize;
@@ -108,48 +111,59 @@ class SimilarityIndex {
idGrowAt = growAt(idHashBits);
}
- long getFileSize() {
- return fileSize;
- }
-
- void setFileSize(long size) {
- fileSize = size;
- }
-
void hash(ObjectLoader obj) throws MissingObjectException, IOException,
TableFullException {
if (obj.isLarge()) {
- ObjectStream in = obj.openStream();
- try {
- setFileSize(in.getSize());
- hash(in, fileSize);
- } finally {
- in.close();
- }
+ hashLargeObject(obj);
} else {
byte[] raw = obj.getCachedBytes();
- setFileSize(raw.length);
hash(raw, 0, raw.length);
}
}
+ private void hashLargeObject(ObjectLoader obj) throws IOException,
+ TableFullException {
+ ObjectStream in1 = obj.openStream();
+ boolean text;
+ try {
+ text = !RawText.isBinary(in1);
+ } finally {
+ in1.close();
+ }
+
+ ObjectStream in2 = obj.openStream();
+ try {
+ hash(in2, in2.getSize(), text);
+ } finally {
+ in2.close();
+ }
+ }
+
void hash(byte[] raw, int ptr, final int end) throws TableFullException {
+ final boolean text = !RawText.isBinary(raw);
+ hashedCnt = 0;
while (ptr < end) {
int hash = 5381;
+ int blockHashedCnt = 0;
int start = ptr;
// Hash one line, or one block, whichever occurs first.
do {
int c = raw[ptr++] & 0xff;
+ // Ignore CR in CRLF sequence if text
+ if (text && c == '\r' && ptr < end && raw[ptr] == '\n')
+ continue;
+ blockHashedCnt++;
if (c == '\n')
break;
hash = (hash << 5) + hash + c;
} while (ptr < end && ptr - start < 64);
- add(hash, ptr - start);
+ hashedCnt += blockHashedCnt;
+ add(hash, blockHashedCnt);
}
}
- void hash(InputStream in, long remaining) throws IOException,
+ void hash(InputStream in, long remaining, boolean text) throws IOException,
TableFullException {
byte[] buf = new byte[4096];
int ptr = 0;
@@ -157,6 +171,7 @@ class SimilarityIndex {
while (0 < remaining) {
int hash = 5381;
+ int blockHashedCnt = 0;
// Hash one line, or one block, whichever occurs first.
int n = 0;
@@ -170,11 +185,16 @@ class SimilarityIndex {
n++;
int c = buf[ptr++] & 0xff;
+ // Ignore CR in CRLF sequence if text
+ if (text && c == '\r' && ptr < cnt && buf[ptr] == '\n')
+ continue;
+ blockHashedCnt++;
if (c == '\n')
break;
hash = (hash << 5) + hash + c;
} while (n < 64 && n < remaining);
- add(hash, n);
+ hashedCnt += blockHashedCnt;
+ add(hash, blockHashedCnt);
remaining -= n;
}
}
@@ -193,7 +213,7 @@ class SimilarityIndex {
}
int score(SimilarityIndex dst, int maxScore) {
- long max = Math.max(fileSize, dst.fileSize);
+ long max = Math.max(hashedCnt, dst.hashedCnt);
if (max == 0)
return maxScore;
return (int) ((common(dst) * maxScore) / max);