From b34960b90f6376697d4d5aaf79d6050b2874848f Mon Sep 17 00:00:00 2001 From: Evgeny Mandrikov Date: Wed, 7 Dec 2011 16:08:53 +0400 Subject: [PATCH] Fix violations, increase coverage --- .../detector/suffixtree/AbstractText.java | 4 + .../suffixtree/DuplicationsCollector.java | 10 +-- .../suffixtree/GeneralisedHashText.java | 77 ------------------- .../detector/suffixtree/Search.java | 2 +- .../SuffixTreeCloneDetectionAlgorithm.java | 67 ++++++++-------- .../detector/suffixtree/TextSet.java | 61 ++++++++++++--- ...SuffixTreeCloneDetectionAlgorithmTest.java | 46 +++++++++++ 7 files changed, 142 insertions(+), 125 deletions(-) delete mode 100644 sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/GeneralisedHashText.java diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java index 5a3ffa0c14c..b0d57551bf8 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java @@ -30,6 +30,10 @@ public abstract class AbstractText implements Text { this.symbols = new ArrayList(size); } + public AbstractText(List symbols) { + this.symbols = symbols; + } + public int length() { return symbols.size(); } diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java index 745fb90a3b3..c6965648bb3 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java @@ -37,7 +37,7 @@ import com.google.common.collect.Lists; */ public class DuplicationsCollector implements Search.Collector { - private final GeneralisedHashText text; + private final TextSet text; private final String originResourceId; /** @@ -51,7 +51,7 @@ public class DuplicationsCollector implements Search.Collector { private ClonePart origin; private final List parts = Lists.newArrayList(); - public DuplicationsCollector(GeneralisedHashText text) { + public DuplicationsCollector(TextSet text) { this.text = text; this.originResourceId = text.getBlock(0).getResourceId(); } @@ -162,9 +162,9 @@ public class DuplicationsCollector implements Search.Collector { // if (!first.getOriginPart().getResourceId().equals(second.getOriginPart().getResourceId())) { // return false; // } - if (first.getCloneUnitLength() > second.getCloneUnitLength()) { - return false; - } + // if (first.getCloneUnitLength() > second.getCloneUnitLength()) { + // return false; + // } List firstParts = first.getCloneParts(); List secondParts = second.getCloneParts(); return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(first.getCloneUnitLength(), second.getCloneUnitLength())) diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/GeneralisedHashText.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/GeneralisedHashText.java deleted file mode 100644 index 202cb85084c..00000000000 --- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/GeneralisedHashText.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Sonar, open source software quality management tool. - * Copyright (C) 2008-2011 SonarSource - * mailto:contact AT sonarsource DOT com - * - * Sonar is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * Sonar is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Sonar; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 - */ -package org.sonar.duplications.detector.suffixtree; - -import java.util.List; - -import org.sonar.duplications.block.Block; - -import com.google.common.collect.Lists; - -public class GeneralisedHashText extends TextSet { - - public GeneralisedHashText(List... blocks) { - super(blocks.length); - - for (int i = 0; i < blocks.length; i++) { - addAll(blocks[i]); - addTerminator(); - } - finish(); - } - - private int count; - private List sizes = Lists.newArrayList(); - - public void addBlock(Block block) { - symbols.add(block); - } - - public void addAll(List list) { - symbols.addAll(list); - } - - public void addTerminator() { - symbols.add(new Terminator(count)); - sizes.add(symbols.size()); - count++; - } - - public void finish() { - super.lens = new int[sizes.size()]; - for (int i = 0; i < sizes.size(); i++) { - super.lens[i] = sizes.get(i); - } - } - - @Override - public Object symbolAt(int index) { - Object obj = super.symbolAt(index); - if (obj instanceof Block) { - return ((Block) obj).getBlockHash(); - } - return obj; - } - - public Block getBlock(int index) { - return (Block) super.symbolAt(index); - } - -} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java index f13410e90d4..80d714339d4 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java @@ -31,7 +31,7 @@ public final class Search { private final List innerNodes = new ArrayList(); public static void perform(TextSet text, Collector reporter) { - new Search(SuffixTree.create(text), text.lens, reporter).compute(); + new Search(SuffixTree.create(text), text.getLens(), reporter).compute(); } private Search(SuffixTree tree, int[] lens, Collector reporter) { diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java index 004074cfa48..0952476569d 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java @@ -25,9 +25,8 @@ import org.sonar.duplications.block.Block; import org.sonar.duplications.block.ByteArray; import org.sonar.duplications.index.CloneGroup; import org.sonar.duplications.index.CloneIndex; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; + +import com.google.common.collect.*; public final class SuffixTreeCloneDetectionAlgorithm { @@ -35,7 +34,7 @@ public final class SuffixTreeCloneDetectionAlgorithm { if (fileBlocks.isEmpty()) { return Collections.EMPTY_LIST; } - GeneralisedHashText text = retrieveFromIndex(cloneIndex, fileBlocks); + TextSet text = createTextSet(cloneIndex, fileBlocks); if (text == null) { return Collections.EMPTY_LIST; } @@ -47,43 +46,30 @@ public final class SuffixTreeCloneDetectionAlgorithm { private SuffixTreeCloneDetectionAlgorithm() { } - private static GeneralisedHashText retrieveFromIndex(CloneIndex index, Collection fileBlocks) { - String originResourceId = fileBlocks.iterator().next().getResourceId(); - + private static TextSet createTextSet(CloneIndex index, Collection fileBlocks) { Set hashes = Sets.newHashSet(); for (Block fileBlock : fileBlocks) { hashes.add(fileBlock.getBlockHash()); } - Map> collection = Maps.newHashMap(); - for (ByteArray hash : hashes) { - Collection blocks = index.getBySequenceHash(hash); - for (Block blockFromIndex : blocks) { - // Godin: skip blocks for this file if they come from index - String resourceId = blockFromIndex.getResourceId(); - if (!originResourceId.equals(resourceId)) { - List list = collection.get(resourceId); - if (list == null) { - list = Lists.newArrayList(); - collection.put(resourceId, list); - } - list.add(blockFromIndex); - } - } - } + String originResourceId = fileBlocks.iterator().next().getResourceId(); + Map> fromIndex = retrieveFromIndex(index, originResourceId, hashes); - if (collection.isEmpty() && hashes.size() == fileBlocks.size()) { // optimization for the case when there is no duplications + if (fromIndex.isEmpty() && hashes.size() == fileBlocks.size()) { // optimization for the case when there is no duplications return null; } - GeneralisedHashText text = new GeneralisedHashText(); + return createTextSet(fileBlocks, fromIndex); + } + + private static TextSet createTextSet(Collection fileBlocks, Map> fromIndex) { + TextSet.Builder textSetBuilder = TextSet.builder(); // TODO Godin: maybe we can reduce size of tree and so memory consumption by removing non-repeatable blocks List sortedFileBlocks = Lists.newArrayList(fileBlocks); Collections.sort(sortedFileBlocks, BLOCK_COMPARATOR); - text.addAll(sortedFileBlocks); - text.addTerminator(); + textSetBuilder.add(sortedFileBlocks); - for (List list : collection.values()) { + for (List list : fromIndex.values()) { Collections.sort(list, BLOCK_COMPARATOR); int i = 0; @@ -92,15 +78,32 @@ public final class SuffixTreeCloneDetectionAlgorithm { while ((j < list.size()) && (list.get(j).getIndexInFile() == list.get(j - 1).getIndexInFile() + 1)) { j++; } - text.addAll(list.subList(i, j)); - text.addTerminator(); + textSetBuilder.add(list.subList(i, j)); i = j; } } - text.finish(); + return textSetBuilder.build(); + } - return text; + private static Map> retrieveFromIndex(CloneIndex index, String originResourceId, Set hashes) { + Map> collection = Maps.newHashMap(); + for (ByteArray hash : hashes) { + Collection blocks = index.getBySequenceHash(hash); + for (Block blockFromIndex : blocks) { + // Godin: skip blocks for this file if they come from index + String resourceId = blockFromIndex.getResourceId(); + if (!originResourceId.equals(resourceId)) { + List list = collection.get(resourceId); + if (list == null) { + list = Lists.newArrayList(); + collection.put(resourceId, list); + } + list.add(blockFromIndex); + } + } + } + return collection; } private static final Comparator BLOCK_COMPARATOR = new Comparator() { diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java index aa3628ed048..4daccb1776b 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java @@ -19,26 +19,67 @@ */ package org.sonar.duplications.detector.suffixtree; +import java.util.List; + +import org.sonar.duplications.block.Block; + +import com.google.common.collect.Lists; + /** * Simplifies construction of generalised suffix-tree. */ public class TextSet extends AbstractText { - int[] lens; + public static class Builder { + + private List symbols = Lists.newArrayList(); + private List sizes = Lists.newArrayList(); + + private Builder() { + } + + public void add(List list) { + symbols.addAll(list); + symbols.add(new Terminator(sizes.size())); + sizes.add(symbols.size()); + } + + public TextSet build() { + int[] lens = new int[sizes.size()]; + for (int i = 0; i < sizes.size(); i++) { + lens[i] = sizes.get(i); + } + return new TextSet(symbols, lens); + } + + } + + public static Builder builder() { + return new Builder(); + } - public TextSet(int size) { - super(100); // FIXME + private int[] lens; - lens = new int[size]; + private TextSet(List symbols, int[] lens) { + super(symbols); + this.lens = lens; } - public TextSet(Text... text) { - this(text.length); - for (int i = 0; i < text.length; i++) { - symbols.addAll(text[i].sequence(0, text[i].length())); - symbols.add(new Terminator(i)); - lens[i] = symbols.size(); + public int[] getLens() { + return lens; + } + + @Override + public Object symbolAt(int index) { + Object obj = super.symbolAt(index); + if (obj instanceof Block) { + return ((Block) obj).getBlockHash(); } + return obj; + } + + public Block getBlock(int index) { + return (Block) super.symbolAt(index); } public static class Terminator { diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java index 572811ef3df..d094196e528 100644 --- a/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java +++ b/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java @@ -112,6 +112,52 @@ public class SuffixTreeCloneDetectionAlgorithmTest extends DetectorTestCase { newClonePart("x", 14, 2))); } + /** + * Given: + *
+   * a: 1 2 3 4
+   * b: 4 3 2
+   * c: 4 3 1
+   * 
+ * Expected: + *
+   * a-c (1)
+   * a-b (2)
+   * a-b-c (3)
+   * a-b-c (4)
+   * 
+   */
+  @Test
+  public void myTest3() {
+    CloneIndex index = createIndex(
+        newBlocks("b", "4 3 2"),
+        newBlocks("c", "4 3 1")
+        );
+    List fileBlocks = newBlocks("a", "1 2 3 4");
+    List result = detect(index, fileBlocks);
+
+    print(result);
+    assertEquals(4, result.size());
+
+    assertThat(result, hasCloneGroup(1,
+        newClonePart("a", 0, 1),
+        newClonePart("c", 2, 1)));
+
+    assertThat(result, hasCloneGroup(1,
+        newClonePart("a", 1, 1),
+        newClonePart("b", 2, 1)));
+
+    assertThat(result, hasCloneGroup(1,
+        newClonePart("a", 2, 1),
+        newClonePart("b", 1, 1),
+        newClonePart("c", 1, 1)));
+
+    assertThat(result, hasCloneGroup(1,
+        newClonePart("a", 3, 1),
+        newClonePart("b", 0, 1),
+        newClonePart("c", 0, 1)));
+  }
+
   @Override
   protected List detect(CloneIndex index, List fileBlocks) {
     return SuffixTreeCloneDetectionAlgorithm.detect(index, fileBlocks);
-- 
2.39.5