SONAR-3060 Improve new CPD algorithm

author Evgeny Mandrikov <mandrikov@gmail.com>

Thu, 15 Dec 2011 10:02:49 +0000 (14:02 +0400)

committer Evgeny Mandrikov <mandrikov@gmail.com>

Thu, 15 Dec 2011 18:26:40 +0000 (22:26 +0400)
author Evgeny Mandrikov <mandrikov@gmail.com>
Thu, 15 Dec 2011 10:02:49 +0000 (14:02 +0400)
committer Evgeny Mandrikov <mandrikov@gmail.com>
Thu, 15 Dec 2011 18:26:40 +0000 (22:26 +0400)
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java

index f9e1fecdec91b78449f03e6b076ace992fb6d0b7..5c317dbe0ce9f41cc421ea68fa95850c44238fac 100644 (file)
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java
@@ -25,7 +25,6 @@ import java.util.List;
  import org.sonar.duplications.detector.ContainsInComparator;
  import org.sonar.duplications.index.CloneGroup;
  import org.sonar.duplications.index.ClonePart;
-import org.sonar.duplications.utils.FastStringComparator;
  import org.sonar.duplications.utils.SortedListsUtils;
  
  import com.google.common.collect.Lists;
@@ -103,7 +102,7 @@ final class Filter {
     * </p>
     * <p>
     * <strong>Important: this method relies on fact that all parts were already sorted by resourceId and unitStart by using
-   * {@link BlocksGroup.BlockComparator}, which uses {@link FastStringComparator} for comparison by resourceId.</strong>
+   * {@link BlocksGroup.BlockComparator}, which uses {@link org.sonar.duplications.utils.FastStringComparator} for comparison by resourceId.</strong>
     * </p>
     * <p>
     * Running time - O(|A|+|B|).
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java

index d0c32f747658aac6d59aaa0600fea74e546e00e2..8e5b4e665d1ba1e2e2cd7c130d9b3e5e745fe8bb 100644 (file)
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java
@@ -21,14 +21,16 @@ package org.sonar.duplications.detector.suffixtree;
  
  import java.util.*;
  
+import com.google.common.collect.Lists;
+
  public final class Search {
  
    private final SuffixTree tree;
    private final TextSet text;
    private final Collector reporter;
  
-  private final List<Integer> list = new ArrayList<Integer>();
-  private final List<Node> innerNodes = new ArrayList<Node>();
+  private final List<Integer> list = Lists.newArrayList();
+  private final List<Node> innerNodes = Lists.newArrayList();
  
    public static void perform(TextSet text, Collector reporter) {
      new Search(SuffixTree.create(text), text, reporter).compute();
@@ -42,52 +44,53 @@ public final class Search {
  
    private void compute() {
      // O(N)
-    computeDepth();
+    dfs();
  
      // O(N * log(N))
      Collections.sort(innerNodes, DEPTH_COMPARATOR);
  
-    // O(N), recursive
-    createListOfLeafs(tree.getRootNode());
-
      // O(N)
      visitInnerNodes();
    }
  
-  private void computeDepth() {
-    Queue<Node> queue = new LinkedList<Node>();
-    queue.add(tree.getRootNode());
-    tree.getRootNode().depth = 0;
-    while (!queue.isEmpty()) {
-      Node node = queue.remove();
-      if (!node.getEdges().isEmpty()) {
+  private static final Comparator<Node> DEPTH_COMPARATOR = new Comparator<Node>() {
+    public int compare(Node o1, Node o2) {
+      return o2.depth - o1.depth;
+    }
+  };
+
+  /**
+   * Depth-first search (DFS).
+   */
+  private void dfs() {
+    LinkedList<Node> stack = Lists.newLinkedList();
+    stack.add(tree.getRootNode());
+    while (!stack.isEmpty()) {
+      Node node = stack.removeLast();
+      node.startSize = list.size();
+      if (node.getEdges().isEmpty()) { // leaf
+        list.add(node.depth);
+        node.endSize = list.size();
+      } else {
          if (node != tree.getRootNode()) { // inner node = not leaf and not root
            innerNodes.add(node);
          }
          for (Edge edge : node.getEdges()) {
            Node endNode = edge.getEndNode();
            endNode.depth = node.depth + edge.getSpan() + 1;
-          queue.add(endNode);
+          stack.addLast(endNode);
          }
        }
      }
-  }
-
-  private static final Comparator<Node> DEPTH_COMPARATOR = new Comparator<Node>() {
-    public int compare(Node o1, Node o2) {
-      return o2.depth - o1.depth;
-    }
-  };
-
-  private void createListOfLeafs(Node node) {
-    node.startSize = list.size();
-    if (node.getEdges().isEmpty()) { // leaf
-      list.add(node.depth);
-    } else {
+    // At this point all inner nodes are ordered by the time of entering, so we visit them from last to first
+    ListIterator<Node> iterator = innerNodes.listIterator(innerNodes.size());
+    while (iterator.hasPrevious()) {
+      Node node = iterator.previous();
+      int max = -1;
        for (Edge edge : node.getEdges()) {
-        createListOfLeafs(edge.getEndNode());
+        max = Math.max(edge.getEndNode().endSize, max);
        }
-      node.endSize = list.size();
+      node.endSize = max;
      }
    }
  
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java

index 7d80e899859126794b977fdfeb69785090aa095b..4be45100f30107e37c0331428a689faff67d0e7d 100644 (file)
--- a/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java
@@ -59,8 +59,6 @@ public class SuffixTreeCloneDetectionAlgorithmTest extends DetectorTestCase {
     * However should be noted that current implementation with suffix-tree also is not optimal,
     * even if it works for this example couple of seconds,
     * because duplications should be filtered in order to remove fully-covered.
-   * Moreover - height of the tree grows, depending on the number of blocks, which might lead to StackOverflowError,
-   * because algorithm uses recursion.
     * But such cases nearly never appear in real-world, so current implementation is acceptable for the moment.
     * </p>
     */
@@ -69,7 +67,7 @@ public class SuffixTreeCloneDetectionAlgorithmTest extends DetectorTestCase {
      CloneIndex index = createIndex();
      List<Block> fileBlocks = Lists.newArrayList();
      int indexInFile = 0;
-    for (int i = 0; i < 2000; i++) {
+    for (int i = 0; i < 5000; i++) {
        Block block = newBlock("x", new ByteArray("01"), indexInFile);
        fileBlocks.add(block);
        indexInFile++;
author	Evgeny Mandrikov <mandrikov@gmail.com>
	Thu, 15 Dec 2011 10:02:49 +0000 (14:02 +0400)
committer	Evgeny Mandrikov <mandrikov@gmail.com>
	Thu, 15 Dec 2011 18:26:40 +0000 (22:26 +0400)
sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java		patch \| blob \| history
sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java		patch \| blob \| history
sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java		patch \| blob \| history