aboutsummaryrefslogtreecommitdiffstats
path: root/sonar-duplications
diff options
context:
space:
mode:
Diffstat (limited to 'sonar-duplications')
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java4
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java10
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/GeneralisedHashText.java77
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java2
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java67
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java61
-rw-r--r--sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java46
7 files changed, 142 insertions, 125 deletions
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java
index 5a3ffa0c14c..b0d57551bf8 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java
@@ -30,6 +30,10 @@ public abstract class AbstractText implements Text {
this.symbols = new ArrayList<Object>(size);
}
+ public AbstractText(List<Object> symbols) {
+ this.symbols = symbols;
+ }
+
public int length() {
return symbols.size();
}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java
index 745fb90a3b3..c6965648bb3 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java
@@ -37,7 +37,7 @@ import com.google.common.collect.Lists;
*/
public class DuplicationsCollector implements Search.Collector {
- private final GeneralisedHashText text;
+ private final TextSet text;
private final String originResourceId;
/**
@@ -51,7 +51,7 @@ public class DuplicationsCollector implements Search.Collector {
private ClonePart origin;
private final List<ClonePart> parts = Lists.newArrayList();
- public DuplicationsCollector(GeneralisedHashText text) {
+ public DuplicationsCollector(TextSet text) {
this.text = text;
this.originResourceId = text.getBlock(0).getResourceId();
}
@@ -162,9 +162,9 @@ public class DuplicationsCollector implements Search.Collector {
// if (!first.getOriginPart().getResourceId().equals(second.getOriginPart().getResourceId())) {
// return false;
// }
- if (first.getCloneUnitLength() > second.getCloneUnitLength()) {
- return false;
- }
+ // if (first.getCloneUnitLength() > second.getCloneUnitLength()) {
+ // return false;
+ // }
List<ClonePart> firstParts = first.getCloneParts();
List<ClonePart> secondParts = second.getCloneParts();
return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(first.getCloneUnitLength(), second.getCloneUnitLength()))
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/GeneralisedHashText.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/GeneralisedHashText.java
deleted file mode 100644
index 202cb85084c..00000000000
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/GeneralisedHashText.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Sonar, open source software quality management tool.
- * Copyright (C) 2008-2011 SonarSource
- * mailto:contact AT sonarsource DOT com
- *
- * Sonar is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 3 of the License, or (at your option) any later version.
- *
- * Sonar is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Sonar; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
- */
-package org.sonar.duplications.detector.suffixtree;
-
-import java.util.List;
-
-import org.sonar.duplications.block.Block;
-
-import com.google.common.collect.Lists;
-
-public class GeneralisedHashText extends TextSet {
-
- public GeneralisedHashText(List<Block>... blocks) {
- super(blocks.length);
-
- for (int i = 0; i < blocks.length; i++) {
- addAll(blocks[i]);
- addTerminator();
- }
- finish();
- }
-
- private int count;
- private List<Integer> sizes = Lists.newArrayList();
-
- public void addBlock(Block block) {
- symbols.add(block);
- }
-
- public void addAll(List<Block> list) {
- symbols.addAll(list);
- }
-
- public void addTerminator() {
- symbols.add(new Terminator(count));
- sizes.add(symbols.size());
- count++;
- }
-
- public void finish() {
- super.lens = new int[sizes.size()];
- for (int i = 0; i < sizes.size(); i++) {
- super.lens[i] = sizes.get(i);
- }
- }
-
- @Override
- public Object symbolAt(int index) {
- Object obj = super.symbolAt(index);
- if (obj instanceof Block) {
- return ((Block) obj).getBlockHash();
- }
- return obj;
- }
-
- public Block getBlock(int index) {
- return (Block) super.symbolAt(index);
- }
-
-}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java
index f13410e90d4..80d714339d4 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java
@@ -31,7 +31,7 @@ public final class Search {
private final List<Node> innerNodes = new ArrayList<Node>();
public static void perform(TextSet text, Collector reporter) {
- new Search(SuffixTree.create(text), text.lens, reporter).compute();
+ new Search(SuffixTree.create(text), text.getLens(), reporter).compute();
}
private Search(SuffixTree tree, int[] lens, Collector reporter) {
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java
index 004074cfa48..0952476569d 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java
@@ -25,9 +25,8 @@ import org.sonar.duplications.block.Block;
import org.sonar.duplications.block.ByteArray;
import org.sonar.duplications.index.CloneGroup;
import org.sonar.duplications.index.CloneIndex;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
+
+import com.google.common.collect.*;
public final class SuffixTreeCloneDetectionAlgorithm {
@@ -35,7 +34,7 @@ public final class SuffixTreeCloneDetectionAlgorithm {
if (fileBlocks.isEmpty()) {
return Collections.EMPTY_LIST;
}
- GeneralisedHashText text = retrieveFromIndex(cloneIndex, fileBlocks);
+ TextSet text = createTextSet(cloneIndex, fileBlocks);
if (text == null) {
return Collections.EMPTY_LIST;
}
@@ -47,43 +46,30 @@ public final class SuffixTreeCloneDetectionAlgorithm {
private SuffixTreeCloneDetectionAlgorithm() {
}
- private static GeneralisedHashText retrieveFromIndex(CloneIndex index, Collection<Block> fileBlocks) {
- String originResourceId = fileBlocks.iterator().next().getResourceId();
-
+ private static TextSet createTextSet(CloneIndex index, Collection<Block> fileBlocks) {
Set<ByteArray> hashes = Sets.newHashSet();
for (Block fileBlock : fileBlocks) {
hashes.add(fileBlock.getBlockHash());
}
- Map<String, List<Block>> collection = Maps.newHashMap();
- for (ByteArray hash : hashes) {
- Collection<Block> blocks = index.getBySequenceHash(hash);
- for (Block blockFromIndex : blocks) {
- // Godin: skip blocks for this file if they come from index
- String resourceId = blockFromIndex.getResourceId();
- if (!originResourceId.equals(resourceId)) {
- List<Block> list = collection.get(resourceId);
- if (list == null) {
- list = Lists.newArrayList();
- collection.put(resourceId, list);
- }
- list.add(blockFromIndex);
- }
- }
- }
+ String originResourceId = fileBlocks.iterator().next().getResourceId();
+ Map<String, List<Block>> fromIndex = retrieveFromIndex(index, originResourceId, hashes);
- if (collection.isEmpty() && hashes.size() == fileBlocks.size()) { // optimization for the case when there is no duplications
+ if (fromIndex.isEmpty() && hashes.size() == fileBlocks.size()) { // optimization for the case when there is no duplications
return null;
}
- GeneralisedHashText text = new GeneralisedHashText();
+ return createTextSet(fileBlocks, fromIndex);
+ }
+
+ private static TextSet createTextSet(Collection<Block> fileBlocks, Map<String, List<Block>> fromIndex) {
+ TextSet.Builder textSetBuilder = TextSet.builder();
// TODO Godin: maybe we can reduce size of tree and so memory consumption by removing non-repeatable blocks
List<Block> sortedFileBlocks = Lists.newArrayList(fileBlocks);
Collections.sort(sortedFileBlocks, BLOCK_COMPARATOR);
- text.addAll(sortedFileBlocks);
- text.addTerminator();
+ textSetBuilder.add(sortedFileBlocks);
- for (List<Block> list : collection.values()) {
+ for (List<Block> list : fromIndex.values()) {
Collections.sort(list, BLOCK_COMPARATOR);
int i = 0;
@@ -92,15 +78,32 @@ public final class SuffixTreeCloneDetectionAlgorithm {
while ((j < list.size()) && (list.get(j).getIndexInFile() == list.get(j - 1).getIndexInFile() + 1)) {
j++;
}
- text.addAll(list.subList(i, j));
- text.addTerminator();
+ textSetBuilder.add(list.subList(i, j));
i = j;
}
}
- text.finish();
+ return textSetBuilder.build();
+ }
- return text;
+ private static Map<String, List<Block>> retrieveFromIndex(CloneIndex index, String originResourceId, Set<ByteArray> hashes) {
+ Map<String, List<Block>> collection = Maps.newHashMap();
+ for (ByteArray hash : hashes) {
+ Collection<Block> blocks = index.getBySequenceHash(hash);
+ for (Block blockFromIndex : blocks) {
+ // Godin: skip blocks for this file if they come from index
+ String resourceId = blockFromIndex.getResourceId();
+ if (!originResourceId.equals(resourceId)) {
+ List<Block> list = collection.get(resourceId);
+ if (list == null) {
+ list = Lists.newArrayList();
+ collection.put(resourceId, list);
+ }
+ list.add(blockFromIndex);
+ }
+ }
+ }
+ return collection;
}
private static final Comparator<Block> BLOCK_COMPARATOR = new Comparator<Block>() {
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java
index aa3628ed048..4daccb1776b 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java
@@ -19,26 +19,67 @@
*/
package org.sonar.duplications.detector.suffixtree;
+import java.util.List;
+
+import org.sonar.duplications.block.Block;
+
+import com.google.common.collect.Lists;
+
/**
* Simplifies construction of <a href="http://en.wikipedia.org/wiki/Generalised_suffix_tree">generalised suffix-tree</a>.
*/
public class TextSet extends AbstractText {
- int[] lens;
+ public static class Builder {
+
+ private List<Object> symbols = Lists.newArrayList();
+ private List<Integer> sizes = Lists.newArrayList();
+
+ private Builder() {
+ }
+
+ public void add(List<Block> list) {
+ symbols.addAll(list);
+ symbols.add(new Terminator(sizes.size()));
+ sizes.add(symbols.size());
+ }
+
+ public TextSet build() {
+ int[] lens = new int[sizes.size()];
+ for (int i = 0; i < sizes.size(); i++) {
+ lens[i] = sizes.get(i);
+ }
+ return new TextSet(symbols, lens);
+ }
+
+ }
+
+ public static Builder builder() {
+ return new Builder();
+ }
- public TextSet(int size) {
- super(100); // FIXME
+ private int[] lens;
- lens = new int[size];
+ private TextSet(List<Object> symbols, int[] lens) {
+ super(symbols);
+ this.lens = lens;
}
- public TextSet(Text... text) {
- this(text.length);
- for (int i = 0; i < text.length; i++) {
- symbols.addAll(text[i].sequence(0, text[i].length()));
- symbols.add(new Terminator(i));
- lens[i] = symbols.size();
+ public int[] getLens() {
+ return lens;
+ }
+
+ @Override
+ public Object symbolAt(int index) {
+ Object obj = super.symbolAt(index);
+ if (obj instanceof Block) {
+ return ((Block) obj).getBlockHash();
}
+ return obj;
+ }
+
+ public Block getBlock(int index) {
+ return (Block) super.symbolAt(index);
}
public static class Terminator {
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java
index 572811ef3df..d094196e528 100644
--- a/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java
@@ -112,6 +112,52 @@ public class SuffixTreeCloneDetectionAlgorithmTest extends DetectorTestCase {
newClonePart("x", 14, 2)));
}
+ /**
+ * Given:
+ * <pre>
+ * a: 1 2 3 4
+ * b: 4 3 2
+ * c: 4 3 1
+ * </pre>
+ * Expected:
+ * <pre>
+ * a-c (1)
+ * a-b (2)
+ * a-b-c (3)
+ * a-b-c (4)
+ * <pre>
+ */
+ @Test
+ public void myTest3() {
+ CloneIndex index = createIndex(
+ newBlocks("b", "4 3 2"),
+ newBlocks("c", "4 3 1")
+ );
+ List<Block> fileBlocks = newBlocks("a", "1 2 3 4");
+ List<CloneGroup> result = detect(index, fileBlocks);
+
+ print(result);
+ assertEquals(4, result.size());
+
+ assertThat(result, hasCloneGroup(1,
+ newClonePart("a", 0, 1),
+ newClonePart("c", 2, 1)));
+
+ assertThat(result, hasCloneGroup(1,
+ newClonePart("a", 1, 1),
+ newClonePart("b", 2, 1)));
+
+ assertThat(result, hasCloneGroup(1,
+ newClonePart("a", 2, 1),
+ newClonePart("b", 1, 1),
+ newClonePart("c", 1, 1)));
+
+ assertThat(result, hasCloneGroup(1,
+ newClonePart("a", 3, 1),
+ newClonePart("b", 0, 1),
+ newClonePart("c", 0, 1)));
+ }
+
@Override
protected List<CloneGroup> detect(CloneIndex index, List<Block> fileBlocks) {
return SuffixTreeCloneDetectionAlgorithm.detect(index, fileBlocks);