aboutsummaryrefslogtreecommitdiffstats
path: root/sonar-duplications
diff options
context:
space:
mode:
authorEvgeny Mandrikov <mandrikov@gmail.com>2011-12-15 14:02:49 +0400
committerEvgeny Mandrikov <mandrikov@gmail.com>2011-12-15 22:26:40 +0400
commitae5528042cd2f948cea3e50c32a5426eb302d3f7 (patch)
tree059fc62017442e0e7011b1143bda411bbc255e0d /sonar-duplications
parent16232e514c5b0d064a114906fb31441b97a480ae (diff)
downloadsonarqube-ae5528042cd2f948cea3e50c32a5426eb302d3f7.tar.gz
sonarqube-ae5528042cd2f948cea3e50c32a5426eb302d3f7.zip
SONAR-3060 Improve new CPD algorithm
* Remove recursive part of algorithm to avoid StackOverflowError * Fix violations
Diffstat (limited to 'sonar-duplications')
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java3
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java61
-rw-r--r--sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java4
3 files changed, 34 insertions, 34 deletions
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java
index f9e1fecdec9..5c317dbe0ce 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java
@@ -25,7 +25,6 @@ import java.util.List;
import org.sonar.duplications.detector.ContainsInComparator;
import org.sonar.duplications.index.CloneGroup;
import org.sonar.duplications.index.ClonePart;
-import org.sonar.duplications.utils.FastStringComparator;
import org.sonar.duplications.utils.SortedListsUtils;
import com.google.common.collect.Lists;
@@ -103,7 +102,7 @@ final class Filter {
* </p>
* <p>
* <strong>Important: this method relies on fact that all parts were already sorted by resourceId and unitStart by using
- * {@link BlocksGroup.BlockComparator}, which uses {@link FastStringComparator} for comparison by resourceId.</strong>
+ * {@link BlocksGroup.BlockComparator}, which uses {@link org.sonar.duplications.utils.FastStringComparator} for comparison by resourceId.</strong>
* </p>
* <p>
* Running time - O(|A|+|B|).
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java
index d0c32f74765..8e5b4e665d1 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java
@@ -21,14 +21,16 @@ package org.sonar.duplications.detector.suffixtree;
import java.util.*;
+import com.google.common.collect.Lists;
+
public final class Search {
private final SuffixTree tree;
private final TextSet text;
private final Collector reporter;
- private final List<Integer> list = new ArrayList<Integer>();
- private final List<Node> innerNodes = new ArrayList<Node>();
+ private final List<Integer> list = Lists.newArrayList();
+ private final List<Node> innerNodes = Lists.newArrayList();
public static void perform(TextSet text, Collector reporter) {
new Search(SuffixTree.create(text), text, reporter).compute();
@@ -42,52 +44,53 @@ public final class Search {
private void compute() {
// O(N)
- computeDepth();
+ dfs();
// O(N * log(N))
Collections.sort(innerNodes, DEPTH_COMPARATOR);
- // O(N), recursive
- createListOfLeafs(tree.getRootNode());
-
// O(N)
visitInnerNodes();
}
- private void computeDepth() {
- Queue<Node> queue = new LinkedList<Node>();
- queue.add(tree.getRootNode());
- tree.getRootNode().depth = 0;
- while (!queue.isEmpty()) {
- Node node = queue.remove();
- if (!node.getEdges().isEmpty()) {
+ private static final Comparator<Node> DEPTH_COMPARATOR = new Comparator<Node>() {
+ public int compare(Node o1, Node o2) {
+ return o2.depth - o1.depth;
+ }
+ };
+
+ /**
+ * Depth-first search (DFS).
+ */
+ private void dfs() {
+ LinkedList<Node> stack = Lists.newLinkedList();
+ stack.add(tree.getRootNode());
+ while (!stack.isEmpty()) {
+ Node node = stack.removeLast();
+ node.startSize = list.size();
+ if (node.getEdges().isEmpty()) { // leaf
+ list.add(node.depth);
+ node.endSize = list.size();
+ } else {
if (node != tree.getRootNode()) { // inner node = not leaf and not root
innerNodes.add(node);
}
for (Edge edge : node.getEdges()) {
Node endNode = edge.getEndNode();
endNode.depth = node.depth + edge.getSpan() + 1;
- queue.add(endNode);
+ stack.addLast(endNode);
}
}
}
- }
-
- private static final Comparator<Node> DEPTH_COMPARATOR = new Comparator<Node>() {
- public int compare(Node o1, Node o2) {
- return o2.depth - o1.depth;
- }
- };
-
- private void createListOfLeafs(Node node) {
- node.startSize = list.size();
- if (node.getEdges().isEmpty()) { // leaf
- list.add(node.depth);
- } else {
+ // At this point all inner nodes are ordered by the time of entering, so we visit them from last to first
+ ListIterator<Node> iterator = innerNodes.listIterator(innerNodes.size());
+ while (iterator.hasPrevious()) {
+ Node node = iterator.previous();
+ int max = -1;
for (Edge edge : node.getEdges()) {
- createListOfLeafs(edge.getEndNode());
+ max = Math.max(edge.getEndNode().endSize, max);
}
- node.endSize = list.size();
+ node.endSize = max;
}
}
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java
index 7d80e899859..4be45100f30 100644
--- a/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java
@@ -59,8 +59,6 @@ public class SuffixTreeCloneDetectionAlgorithmTest extends DetectorTestCase {
* However should be noted that current implementation with suffix-tree also is not optimal,
* even if it works for this example couple of seconds,
* because duplications should be filtered in order to remove fully-covered.
- * Moreover - height of the tree grows, depending on the number of blocks, which might lead to StackOverflowError,
- * because algorithm uses recursion.
* But such cases nearly never appear in real-world, so current implementation is acceptable for the moment.
* </p>
*/
@@ -69,7 +67,7 @@ public class SuffixTreeCloneDetectionAlgorithmTest extends DetectorTestCase {
CloneIndex index = createIndex();
List<Block> fileBlocks = Lists.newArrayList();
int indexInFile = 0;
- for (int i = 0; i < 2000; i++) {
+ for (int i = 0; i < 5000; i++) {
Block block = newBlock("x", new ByteArray("01"), indexInFile);
fileBlocks.add(block);
indexInFile++;