From b65a688104ad5ba6ef9db5033d173feaa4953f94 Mon Sep 17 00:00:00 2001
From: Evgeny Mandrikov
+ * Current CloneGroup can not include none of CloneGroup, which were constructed before.
+ * Proof:
+ * According to an order of visiting nodes in suffix tree - length of earlier >= length of current.
+ * If length of earlier > length of current, then earlier not contained in current.
+ * If length of earlier = length of current, then earlier can be contained in current only
+ * when current has exactly the same and maybe some additional CloneParts as earlier,
+ * what in his turn will mean that two inner-nodes on same depth will satisfy condition
+ * current.startSize <= earlier.startSize <= earlier.endSize <= current.endSize , which is not possible for different inner-nodes on same depth.
+ *
- * Clone A is contained in another clone B, if every part pA from A has part pB in B,
+ * CloneGroup A is included in another CloneGroup B, if every part pA from A has part pB in B,
* which satisfy the conditions:
*
* (pA.resourceId == pB.resourceId) and (pB.unitStart <= pA.unitStart) and (pA.unitEnd <= pB.unitEnd)
@@ -145,7 +141,7 @@ public class DuplicationsCollector implements Search.Collector {
*
* pB.resourceId == pA.resourceId
*
- * So this relation is:
+ * Inclusion is the partial order, thus this relation is:
*
*
*
- * This method uses the fact that all parts already sorted by resourceId and unitStart (see {@link #CLONEPART_COMPARATOR}), + * This method uses the fact that all parts already sorted by resourceId and unitStart (see {@link ContainsInComparator#CLONEPART_COMPARATOR}), * so running time - O(|A|+|B|). *
*/ private static boolean containsIn(CloneGroup first, CloneGroup second) { - // TODO Godin: must prove that this is unused - // if (!first.getOriginPart().getResourceId().equals(second.getOriginPart().getResourceId())) { - // return false; - // } - // if (first.getCloneUnitLength() > second.getCloneUnitLength()) { - // return false; - // } List+ * Length - is a depth of node. And nodes are visited in descending order of depth, + * thus we guaranty that length will not increase between two sequential calls of this method + * (can be equal or less than previous value). + *
+ * + * @param size number of parts in group + * @param length length of each part in group + */ + abstract void startOfGroup(int size, int length); + + /** + * Invoked as many times as leafs in the subtree, where current node is root. + * * @param start start position in generalised text * @param end end position in generalised text */ - void part(int start, int end, int len); + abstract void part(int start, int end); - void endOfGroup(); + /** + * Invoked at the end of processing for current node. + */ + abstract void endOfGroup(); } diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTree.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTree.java index b6e21c940c7..e79137dc10a 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTree.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTree.java @@ -22,9 +22,24 @@ package org.sonar.duplications.detector.suffixtree; import com.google.common.base.Objects; /** - * The implementation of the algorithm for constructing suffix-tree based on - * Java-port of + * Provides algorithm to construct suffix tree. + *+ * Suffix tree for the string S of length n is defined as a tree such that: + *
+ * This implementation was adapted from Java-port of * Mark Nelson's C++ implementation of Ukkonen's algorithm. + *
*/ public final class SuffixTree { diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java index 4daccb1776b..033dc879a44 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java @@ -28,28 +28,28 @@ import com.google.common.collect.Lists; /** * Simplifies construction of generalised suffix-tree. */ -public class TextSet extends AbstractText { +public final class TextSet extends AbstractText { - public static class Builder { + public static final class Builder { private List