12 years ago · b34960b90f
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/AbstractText.java
@@ -30,6 +30,10 @@ public abstract class AbstractText implements Text {
    this.symbols = new ArrayList<Object>(size);
  }

  public AbstractText(List<Object> symbols) {
    this.symbols = symbols;
  }

  public int length() {
    return symbols.size();
  }
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java
@@ -37,7 +37,7 @@ import com.google.common.collect.Lists;
 */
 public class DuplicationsCollector implements Search.Collector {

  private final GeneralisedHashText text;
  private final TextSet text;
  private final String originResourceId;

  /**
@@ -51,7 +51,7 @@ public class DuplicationsCollector implements Search.Collector {
  private ClonePart origin;
  private final List<ClonePart> parts = Lists.newArrayList();

  public DuplicationsCollector(GeneralisedHashText text) {
  public DuplicationsCollector(TextSet text) {
    this.text = text;
    this.originResourceId = text.getBlock(0).getResourceId();
  }
@@ -162,9 +162,9 @@ public class DuplicationsCollector implements Search.Collector {
    // if (!first.getOriginPart().getResourceId().equals(second.getOriginPart().getResourceId())) {
    // return false;
    // }
    if (first.getCloneUnitLength() > second.getCloneUnitLength()) {
      return false;
    }
    // if (first.getCloneUnitLength() > second.getCloneUnitLength()) {
    // return false;
    // }
    List<ClonePart> firstParts = first.getCloneParts();
    List<ClonePart> secondParts = second.getCloneParts();
    return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(first.getCloneUnitLength(), second.getCloneUnitLength()))
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/GeneralisedHashText.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/GeneralisedHashText.java
@@ -1,77 +0,0 @@
 /*
 * Sonar, open source software quality management tool.
 * Copyright (C) 2008-2011 SonarSource
 * mailto:contact AT sonarsource DOT com
 *
 * Sonar is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * Sonar is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Sonar; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
 */
 package org.sonar.duplications.detector.suffixtree;

 import java.util.List;

 import org.sonar.duplications.block.Block;

 import com.google.common.collect.Lists;

 public class GeneralisedHashText extends TextSet {

  public GeneralisedHashText(List<Block>... blocks) {
    super(blocks.length);

    for (int i = 0; i < blocks.length; i++) {
      addAll(blocks[i]);
      addTerminator();
    }
    finish();
  }

  private int count;
  private List<Integer> sizes = Lists.newArrayList();

  public void addBlock(Block block) {
    symbols.add(block);
  }

  public void addAll(List<Block> list) {
    symbols.addAll(list);
  }

  public void addTerminator() {
    symbols.add(new Terminator(count));
    sizes.add(symbols.size());
    count++;
  }

  public void finish() {
    super.lens = new int[sizes.size()];
    for (int i = 0; i < sizes.size(); i++) {
      super.lens[i] = sizes.get(i);
    }
  }

  @Override
  public Object symbolAt(int index) {
    Object obj = super.symbolAt(index);
    if (obj instanceof Block) {
      return ((Block) obj).getBlockHash();
    }
    return obj;
  }

  public Block getBlock(int index) {
    return (Block) super.symbolAt(index);
  }

 }
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/Search.java
@@ -31,7 +31,7 @@ public final class Search {
  private final List<Node> innerNodes = new ArrayList<Node>();

  public static void perform(TextSet text, Collector reporter) {
    new Search(SuffixTree.create(text), text.lens, reporter).compute();
    new Search(SuffixTree.create(text), text.getLens(), reporter).compute();
  }

  private Search(SuffixTree tree, int[] lens, Collector reporter) {
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithm.java
@@ -25,9 +25,8 @@ import org.sonar.duplications.block.Block;
 import org.sonar.duplications.block.ByteArray;
 import org.sonar.duplications.index.CloneGroup;
 import org.sonar.duplications.index.CloneIndex;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;

 import com.google.common.collect.*;

 public final class SuffixTreeCloneDetectionAlgorithm {

@@ -35,7 +34,7 @@ public final class SuffixTreeCloneDetectionAlgorithm {
    if (fileBlocks.isEmpty()) {
      return Collections.EMPTY_LIST;
    }
    GeneralisedHashText text = retrieveFromIndex(cloneIndex, fileBlocks);
    TextSet text = createTextSet(cloneIndex, fileBlocks);
    if (text == null) {
      return Collections.EMPTY_LIST;
    }
@@ -47,43 +46,30 @@ public final class SuffixTreeCloneDetectionAlgorithm {
  private SuffixTreeCloneDetectionAlgorithm() {
  }

  private static GeneralisedHashText retrieveFromIndex(CloneIndex index, Collection<Block> fileBlocks) {
    String originResourceId = fileBlocks.iterator().next().getResourceId();

  private static TextSet createTextSet(CloneIndex index, Collection<Block> fileBlocks) {
    Set<ByteArray> hashes = Sets.newHashSet();
    for (Block fileBlock : fileBlocks) {
      hashes.add(fileBlock.getBlockHash());
    }

    Map<String, List<Block>> collection = Maps.newHashMap();
    for (ByteArray hash : hashes) {
      Collection<Block> blocks = index.getBySequenceHash(hash);
      for (Block blockFromIndex : blocks) {
        // Godin: skip blocks for this file if they come from index
        String resourceId = blockFromIndex.getResourceId();
        if (!originResourceId.equals(resourceId)) {
          List<Block> list = collection.get(resourceId);
          if (list == null) {
            list = Lists.newArrayList();
            collection.put(resourceId, list);
          }
          list.add(blockFromIndex);
        }
      }
    }
    String originResourceId = fileBlocks.iterator().next().getResourceId();
    Map<String, List<Block>> fromIndex = retrieveFromIndex(index, originResourceId, hashes);

    if (collection.isEmpty() && hashes.size() == fileBlocks.size()) { // optimization for the case when there is no duplications
    if (fromIndex.isEmpty() && hashes.size() == fileBlocks.size()) { // optimization for the case when there is no duplications
      return null;
    }

    GeneralisedHashText text = new GeneralisedHashText();
    return createTextSet(fileBlocks, fromIndex);
  }

  private static TextSet createTextSet(Collection<Block> fileBlocks, Map<String, List<Block>> fromIndex) {
    TextSet.Builder textSetBuilder = TextSet.builder();
    // TODO Godin: maybe we can reduce size of tree and so memory consumption by removing non-repeatable blocks
    List<Block> sortedFileBlocks = Lists.newArrayList(fileBlocks);
    Collections.sort(sortedFileBlocks, BLOCK_COMPARATOR);
    text.addAll(sortedFileBlocks);
    text.addTerminator();
    textSetBuilder.add(sortedFileBlocks);

    for (List<Block> list : collection.values()) {
    for (List<Block> list : fromIndex.values()) {
      Collections.sort(list, BLOCK_COMPARATOR);

      int i = 0;
@@ -92,15 +78,32 @@ public final class SuffixTreeCloneDetectionAlgorithm {
        while ((j < list.size()) && (list.get(j).getIndexInFile() == list.get(j - 1).getIndexInFile() + 1)) {
          j++;
        }
        text.addAll(list.subList(i, j));
        text.addTerminator();
        textSetBuilder.add(list.subList(i, j));
        i = j;
      }
    }

    text.finish();
    return textSetBuilder.build();
  }

    return text;
  private static Map<String, List<Block>> retrieveFromIndex(CloneIndex index, String originResourceId, Set<ByteArray> hashes) {
    Map<String, List<Block>> collection = Maps.newHashMap();
    for (ByteArray hash : hashes) {
      Collection<Block> blocks = index.getBySequenceHash(hash);
      for (Block blockFromIndex : blocks) {
        // Godin: skip blocks for this file if they come from index
        String resourceId = blockFromIndex.getResourceId();
        if (!originResourceId.equals(resourceId)) {
          List<Block> list = collection.get(resourceId);
          if (list == null) {
            list = Lists.newArrayList();
            collection.put(resourceId, list);
          }
          list.add(blockFromIndex);
        }
      }
    }
    return collection;
  }

  private static final Comparator<Block> BLOCK_COMPARATOR = new Comparator<Block>() {
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/TextSet.java
@@ -19,26 +19,67 @@
 */
 package org.sonar.duplications.detector.suffixtree;

 import java.util.List;

 import org.sonar.duplications.block.Block;

 import com.google.common.collect.Lists;

 /**
 * Simplifies construction of <a href="http://en.wikipedia.org/wiki/Generalised_suffix_tree">generalised suffix-tree</a>.
 */
 public class TextSet extends AbstractText {

  int[] lens;
  public static class Builder {

    private List<Object> symbols = Lists.newArrayList();
    private List<Integer> sizes = Lists.newArrayList();

    private Builder() {
    }

    public void add(List<Block> list) {
      symbols.addAll(list);
      symbols.add(new Terminator(sizes.size()));
      sizes.add(symbols.size());
    }

    public TextSet build() {
      int[] lens = new int[sizes.size()];
      for (int i = 0; i < sizes.size(); i++) {
        lens[i] = sizes.get(i);
      }
      return new TextSet(symbols, lens);
    }

  }

  public static Builder builder() {
    return new Builder();
  }

  public TextSet(int size) {
    super(100); // FIXME
  private int[] lens;

    lens = new int[size];
  private TextSet(List<Object> symbols, int[] lens) {
    super(symbols);
    this.lens = lens;
  }

  public TextSet(Text... text) {
    this(text.length);
    for (int i = 0; i < text.length; i++) {
      symbols.addAll(text[i].sequence(0, text[i].length()));
      symbols.add(new Terminator(i));
      lens[i] = symbols.size();
  public int[] getLens() {
    return lens;
  }

  @Override
  public Object symbolAt(int index) {
    Object obj = super.symbolAt(index);
    if (obj instanceof Block) {
      return ((Block) obj).getBlockHash();
    }
    return obj;
  }

  public Block getBlock(int index) {
    return (Block) super.symbolAt(index);
  }

  public static class Terminator {
--- a/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/detector/suffixtree/SuffixTreeCloneDetectionAlgorithmTest.java
@@ -112,6 +112,52 @@ public class SuffixTreeCloneDetectionAlgorithmTest extends DetectorTestCase {
        newClonePart("x", 14, 2)));
  }

  /**
   * Given:
   * <pre>
   * a: 1 2 3 4
   * b: 4 3 2
   * c: 4 3 1
   * </pre>
   * Expected:
   * <pre>
   * a-c (1)
   * a-b (2)
   * a-b-c (3)
   * a-b-c (4)
   * <pre>
   */
  @Test
  public void myTest3() {
    CloneIndex index = createIndex(
        newBlocks("b", "4 3 2"),
        newBlocks("c", "4 3 1")
        );
    List<Block> fileBlocks = newBlocks("a", "1 2 3 4");
    List<CloneGroup> result = detect(index, fileBlocks);

    print(result);
    assertEquals(4, result.size());

    assertThat(result, hasCloneGroup(1,
        newClonePart("a", 0, 1),
        newClonePart("c", 2, 1)));

    assertThat(result, hasCloneGroup(1,
        newClonePart("a", 1, 1),
        newClonePart("b", 2, 1)));

    assertThat(result, hasCloneGroup(1,
        newClonePart("a", 2, 1),
        newClonePart("b", 1, 1),
        newClonePart("c", 1, 1)));

    assertThat(result, hasCloneGroup(1,
        newClonePart("a", 3, 1),
        newClonePart("b", 0, 1),
        newClonePart("c", 0, 1)));
  }

  @Override
  protected List<CloneGroup> detect(CloneIndex index, List<Block> fileBlocks) {
    return SuffixTreeCloneDetectionAlgorithm.detect(index, fileBlocks);