diff options
Diffstat (limited to 'sonar-duplications')
7 files changed, 142 insertions, 125 deletions
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java index 5a3ffa0c14c..b0d57551bf8 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java @@ -30,6 +30,10 @@ public abstract class AbstractText implements Text { this.symbols = new ArrayList<Object>(size); } + public AbstractText(List<Object> symbols) { + this.symbols = symbols; + } + public int length() { return symbols.size(); } diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java index 745fb90a3b3..c6965648bb3 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java @@ -37,7 +37,7 @@ import com.google.common.collect.Lists; */ public class DuplicationsCollector implements Search.Collector { - private final GeneralisedHashText text; + private final TextSet text; private final String originResourceId; /** @@ -51,7 +51,7 @@ public class DuplicationsCollector implements Search.Collector { private ClonePart origin; private final List<ClonePart> parts = Lists.newArrayList(); - public DuplicationsCollector(GeneralisedHashText text) { + public DuplicationsCollector(TextSet text) { this.text = text; this.originResourceId = text.getBlock(0).getResourceId(); } @@ -162,9 +162,9 @@ public class DuplicationsCollector implements Search.Collector { // if (!first.getOriginPart().getResourceId().equals(second.getOriginPart().getResourceId())) { // return false; // } - if (first.getCloneUnitLength() > second.getCloneUnitLength()) { - return false; - } + // if (first.getCloneUnitLength() > second.getCloneUnitLength()) { + // return false; + // } List<ClonePart> firstParts = first.getCloneParts(); List<ClonePart> secondParts = second.getCloneParts(); return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(first.getCloneUnitLength(), second.getCloneUnitLength())) diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/GeneralisedHashText.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/GeneralisedHashText.java deleted file mode 100644 index 202cb85084c..00000000000 --- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/GeneralisedHashText.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Sonar, open source software quality management tool. - * Copyright (C) 2008-2011 SonarSource - * mailto:contact AT sonarsource DOT com - * - * Sonar is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * Sonar is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Sonar; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 - */ -package org.sonar.duplications.detector.suffixtree; - -import java.util.List; - -import org.sonar.duplications.block.Block; - -import com.google.common.collect.Lists; - -public class GeneralisedHashText extends TextSet { - - public GeneralisedHashText(List<Block>... blocks) { - super(blocks.length); - - for (int i = 0; i < blocks.length; i++) { - addAll(blocks[i]); - addTerminator(); - } - finish(); - } - - private int count; - private List<Integer> sizes = Lists.newArrayList(); - - public void addBlock(Block block) { - symbols.add(block); - } - - public void addAll(List<Block> list) { - symbols.addAll(list); - } - - public void addTerminator() { - symbols.add(new Terminator(count)); - sizes.add(symbols.size()); - count++; - } - - public void finish() { - super.lens = new int[sizes.size()]; - for (int i = 0; i < sizes.size(); i++) { - super.lens[i] = sizes.get(i); - } - } - - @Override - public Object symbolAt(int index) { - Object obj = super.symbolAt(index); - if (obj instanceof Block) { - return ((Block) obj).getBlockHash(); - } - return obj; - } - - public Block getBlock(int index) { - return (Block) super.symbolAt(index); - } - -} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java index f13410e90d4..80d714339d4 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java @@ -31,7 +31,7 @@ public final class Search { private final List<Node> innerNodes = new ArrayList<Node>(); public static void perform(TextSet text, Collector reporter) { - new Search(SuffixTree.create(text), text.lens, reporter).compute(); + new Search(SuffixTree.create(text), text.getLens(), reporter).compute(); } private Search(SuffixTree tree, int[] lens, Collector reporter) { diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java index 004074cfa48..0952476569d 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java @@ -25,9 +25,8 @@ import org.sonar.duplications.block.Block; import org.sonar.duplications.block.ByteArray; import org.sonar.duplications.index.CloneGroup; import org.sonar.duplications.index.CloneIndex; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; + +import com.google.common.collect.*; public final class SuffixTreeCloneDetectionAlgorithm { @@ -35,7 +34,7 @@ public final class SuffixTreeCloneDetectionAlgorithm { if (fileBlocks.isEmpty()) { return Collections.EMPTY_LIST; } - GeneralisedHashText text = retrieveFromIndex(cloneIndex, fileBlocks); + TextSet text = createTextSet(cloneIndex, fileBlocks); if (text == null) { return Collections.EMPTY_LIST; } @@ -47,43 +46,30 @@ public final class SuffixTreeCloneDetectionAlgorithm { private SuffixTreeCloneDetectionAlgorithm() { } - private static GeneralisedHashText retrieveFromIndex(CloneIndex index, Collection<Block> fileBlocks) { - String originResourceId = fileBlocks.iterator().next().getResourceId(); - + private static TextSet createTextSet(CloneIndex index, Collection<Block> fileBlocks) { Set<ByteArray> hashes = Sets.newHashSet(); for (Block fileBlock : fileBlocks) { hashes.add(fileBlock.getBlockHash()); } - Map<String, List<Block>> collection = Maps.newHashMap(); - for (ByteArray hash : hashes) { - Collection<Block> blocks = index.getBySequenceHash(hash); - for (Block blockFromIndex : blocks) { - // Godin: skip blocks for this file if they come from index - String resourceId = blockFromIndex.getResourceId(); - if (!originResourceId.equals(resourceId)) { - List<Block> list = collection.get(resourceId); - if (list == null) { - list = Lists.newArrayList(); - collection.put(resourceId, list); - } - list.add(blockFromIndex); - } - } - } + String originResourceId = fileBlocks.iterator().next().getResourceId(); + Map<String, List<Block>> fromIndex = retrieveFromIndex(index, originResourceId, hashes); - if (collection.isEmpty() && hashes.size() == fileBlocks.size()) { // optimization for the case when there is no duplications + if (fromIndex.isEmpty() && hashes.size() == fileBlocks.size()) { // optimization for the case when there is no duplications return null; } - GeneralisedHashText text = new GeneralisedHashText(); + return createTextSet(fileBlocks, fromIndex); + } + + private static TextSet createTextSet(Collection<Block> fileBlocks, Map<String, List<Block>> fromIndex) { + TextSet.Builder textSetBuilder = TextSet.builder(); // TODO Godin: maybe we can reduce size of tree and so memory consumption by removing non-repeatable blocks List<Block> sortedFileBlocks = Lists.newArrayList(fileBlocks); Collections.sort(sortedFileBlocks, BLOCK_COMPARATOR); - text.addAll(sortedFileBlocks); - text.addTerminator(); + textSetBuilder.add(sortedFileBlocks); - for (List<Block> list : collection.values()) { + for (List<Block> list : fromIndex.values()) { Collections.sort(list, BLOCK_COMPARATOR); int i = 0; @@ -92,15 +78,32 @@ public final class SuffixTreeCloneDetectionAlgorithm { while ((j < list.size()) && (list.get(j).getIndexInFile() == list.get(j - 1).getIndexInFile() + 1)) { j++; } - text.addAll(list.subList(i, j)); - text.addTerminator(); + textSetBuilder.add(list.subList(i, j)); i = j; } } - text.finish(); + return textSetBuilder.build(); + } - return text; + private static Map<String, List<Block>> retrieveFromIndex(CloneIndex index, String originResourceId, Set<ByteArray> hashes) { + Map<String, List<Block>> collection = Maps.newHashMap(); + for (ByteArray hash : hashes) { + Collection<Block> blocks = index.getBySequenceHash(hash); + for (Block blockFromIndex : blocks) { + // Godin: skip blocks for this file if they come from index + String resourceId = blockFromIndex.getResourceId(); + if (!originResourceId.equals(resourceId)) { + List<Block> list = collection.get(resourceId); + if (list == null) { + list = Lists.newArrayList(); + collection.put(resourceId, list); + } + list.add(blockFromIndex); + } + } + } + return collection; } private static final Comparator<Block> BLOCK_COMPARATOR = new Comparator<Block>() { diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java index aa3628ed048..4daccb1776b 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java @@ -19,26 +19,67 @@ */ package org.sonar.duplications.detector.suffixtree; +import java.util.List; + +import org.sonar.duplications.block.Block; + +import com.google.common.collect.Lists; + /** * Simplifies construction of <a href="http://en.wikipedia.org/wiki/Generalised_suffix_tree">generalised suffix-tree</a>. */ public class TextSet extends AbstractText { - int[] lens; + public static class Builder { + + private List<Object> symbols = Lists.newArrayList(); + private List<Integer> sizes = Lists.newArrayList(); + + private Builder() { + } + + public void add(List<Block> list) { + symbols.addAll(list); + symbols.add(new Terminator(sizes.size())); + sizes.add(symbols.size()); + } + + public TextSet build() { + int[] lens = new int[sizes.size()]; + for (int i = 0; i < sizes.size(); i++) { + lens[i] = sizes.get(i); + } + return new TextSet(symbols, lens); + } + + } + + public static Builder builder() { + return new Builder(); + } - public TextSet(int size) { - super(100); // FIXME + private int[] lens; - lens = new int[size]; + private TextSet(List<Object> symbols, int[] lens) { + super(symbols); + this.lens = lens; } - public TextSet(Text... text) { - this(text.length); - for (int i = 0; i < text.length; i++) { - symbols.addAll(text[i].sequence(0, text[i].length())); - symbols.add(new Terminator(i)); - lens[i] = symbols.size(); + public int[] getLens() { + return lens; + } + + @Override + public Object symbolAt(int index) { + Object obj = super.symbolAt(index); + if (obj instanceof Block) { + return ((Block) obj).getBlockHash(); } + return obj; + } + + public Block getBlock(int index) { + return (Block) super.symbolAt(index); } public static class Terminator { diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java index 572811ef3df..d094196e528 100644 --- a/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java +++ b/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java @@ -112,6 +112,52 @@ public class SuffixTreeCloneDetectionAlgorithmTest extends DetectorTestCase { newClonePart("x", 14, 2))); } + /** + * Given: + * <pre> + * a: 1 2 3 4 + * b: 4 3 2 + * c: 4 3 1 + * </pre> + * Expected: + * <pre> + * a-c (1) + * a-b (2) + * a-b-c (3) + * a-b-c (4) + * <pre> + */ + @Test + public void myTest3() { + CloneIndex index = createIndex( + newBlocks("b", "4 3 2"), + newBlocks("c", "4 3 1") + ); + List<Block> fileBlocks = newBlocks("a", "1 2 3 4"); + List<CloneGroup> result = detect(index, fileBlocks); + + print(result); + assertEquals(4, result.size()); + + assertThat(result, hasCloneGroup(1, + newClonePart("a", 0, 1), + newClonePart("c", 2, 1))); + + assertThat(result, hasCloneGroup(1, + newClonePart("a", 1, 1), + newClonePart("b", 2, 1))); + + assertThat(result, hasCloneGroup(1, + newClonePart("a", 2, 1), + newClonePart("b", 1, 1), + newClonePart("c", 1, 1))); + + assertThat(result, hasCloneGroup(1, + newClonePart("a", 3, 1), + newClonePart("b", 0, 1), + newClonePart("c", 0, 1))); + } + @Override protected List<CloneGroup> detect(CloneIndex index, List<Block> fileBlocks) { return SuffixTreeCloneDetectionAlgorithm.detect(index, fileBlocks); |