2 * Sonar, open source software quality management tool.
3 * Copyright (C) 2008-2012 SonarSource
4 * mailto:contact AT sonarsource DOT com
6 * Sonar is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 3 of the License, or (at your option) any later version.
11 * Sonar is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Sonar; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
20 package org.sonar.duplications.detector.suffixtree;
24 import org.sonar.duplications.block.Block;
25 import org.sonar.duplications.block.ByteArray;
26 import org.sonar.duplications.index.CloneGroup;
27 import org.sonar.duplications.index.CloneIndex;
29 import com.google.common.collect.*;
31 public final class SuffixTreeCloneDetectionAlgorithm {
33 public static List<CloneGroup> detect(CloneIndex cloneIndex, Collection<Block> fileBlocks) {
34 if (fileBlocks.isEmpty()) {
35 return Collections.EMPTY_LIST;
37 TextSet text = createTextSet(cloneIndex, fileBlocks);
39 return Collections.EMPTY_LIST;
41 DuplicationsCollector reporter = new DuplicationsCollector(text);
42 Search.perform(text, reporter);
43 return reporter.getResult();
46 private SuffixTreeCloneDetectionAlgorithm() {
49 private static TextSet createTextSet(CloneIndex index, Collection<Block> fileBlocks) {
50 Set<ByteArray> hashes = Sets.newHashSet();
51 for (Block fileBlock : fileBlocks) {
52 hashes.add(fileBlock.getBlockHash());
55 String originResourceId = fileBlocks.iterator().next().getResourceId();
56 Map<String, List<Block>> fromIndex = retrieveFromIndex(index, originResourceId, hashes);
58 if (fromIndex.isEmpty() && hashes.size() == fileBlocks.size()) { // optimization for the case when there is no duplications
62 return createTextSet(fileBlocks, fromIndex);
65 private static TextSet createTextSet(Collection<Block> fileBlocks, Map<String, List<Block>> fromIndex) {
66 TextSet.Builder textSetBuilder = TextSet.builder();
67 // TODO Godin: maybe we can reduce size of tree and so memory consumption by removing non-repeatable blocks
68 List<Block> sortedFileBlocks = Lists.newArrayList(fileBlocks);
69 Collections.sort(sortedFileBlocks, BLOCK_COMPARATOR);
70 textSetBuilder.add(sortedFileBlocks);
72 for (List<Block> list : fromIndex.values()) {
73 Collections.sort(list, BLOCK_COMPARATOR);
76 while (i < list.size()) {
78 while ((j < list.size()) && (list.get(j).getIndexInFile() == list.get(j - 1).getIndexInFile() + 1)) {
81 textSetBuilder.add(list.subList(i, j));
86 return textSetBuilder.build();
89 private static Map<String, List<Block>> retrieveFromIndex(CloneIndex index, String originResourceId, Set<ByteArray> hashes) {
90 Map<String, List<Block>> collection = Maps.newHashMap();
91 for (ByteArray hash : hashes) {
92 Collection<Block> blocks = index.getBySequenceHash(hash);
93 for (Block blockFromIndex : blocks) {
94 // Godin: skip blocks for this file if they come from index
95 String resourceId = blockFromIndex.getResourceId();
96 if (!originResourceId.equals(resourceId)) {
97 List<Block> list = collection.get(resourceId);
99 list = Lists.newArrayList();
100 collection.put(resourceId, list);
102 list.add(blockFromIndex);
109 private static final Comparator<Block> BLOCK_COMPARATOR = new Comparator<Block>() {
110 public int compare(Block o1, Block o2) {
111 return o1.getIndexInFile() - o2.getIndexInFile();