source.dussan.org Git - sonarqube.git/blob

   1 /*
   2  * Sonar, open source software quality management tool.
   3  * Copyright (C) 2008-2012 SonarSource
   4  * mailto:contact AT sonarsource DOT com
   5  *
   6  * Sonar is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 3 of the License, or (at your option) any later version.
  10  *
  11  * Sonar is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with Sonar; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
  19  */
  20 package org.sonar.duplications.detector.suffixtree;
  21
  22 import java.util.*;
  23
  24 import org.sonar.duplications.block.Block;
  25 import org.sonar.duplications.block.ByteArray;
  26 import org.sonar.duplications.index.CloneGroup;
  27 import org.sonar.duplications.index.CloneIndex;
  28
  29 import com.google.common.collect.*;
  30
  31 public final class SuffixTreeCloneDetectionAlgorithm {
  32
  33   public static List<CloneGroup> detect(CloneIndex cloneIndex, Collection<Block> fileBlocks) {
  34     if (fileBlocks.isEmpty()) {
  35       return Collections.EMPTY_LIST;
  36     }
  37     TextSet text = createTextSet(cloneIndex, fileBlocks);
  38     if (text == null) {
  39       return Collections.EMPTY_LIST;
  40     }
  41     DuplicationsCollector reporter = new DuplicationsCollector(text);
  42     Search.perform(text, reporter);
  43     return reporter.getResult();
  44   }
  45
  46   private SuffixTreeCloneDetectionAlgorithm() {
  47   }
  48
  49   private static TextSet createTextSet(CloneIndex index, Collection<Block> fileBlocks) {
  50     Set<ByteArray> hashes = Sets.newHashSet();
  51     for (Block fileBlock : fileBlocks) {
  52       hashes.add(fileBlock.getBlockHash());
  53     }
  54
  55     String originResourceId = fileBlocks.iterator().next().getResourceId();
  56     Map<String, List<Block>> fromIndex = retrieveFromIndex(index, originResourceId, hashes);
  57
  58     if (fromIndex.isEmpty() && hashes.size() == fileBlocks.size()) { // optimization for the case when there is no duplications
  59       return null;
  60     }
  61
  62     return createTextSet(fileBlocks, fromIndex);
  63   }
  64
  65   private static TextSet createTextSet(Collection<Block> fileBlocks, Map<String, List<Block>> fromIndex) {
  66     TextSet.Builder textSetBuilder = TextSet.builder();
  67     // TODO Godin: maybe we can reduce size of tree and so memory consumption by removing non-repeatable blocks
  68     List<Block> sortedFileBlocks = Lists.newArrayList(fileBlocks);
  69     Collections.sort(sortedFileBlocks, BLOCK_COMPARATOR);
  70     textSetBuilder.add(sortedFileBlocks);
  71
  72     for (List<Block> list : fromIndex.values()) {
  73       Collections.sort(list, BLOCK_COMPARATOR);
  74
  75       int i = 0;
  76       while (i < list.size()) {
  77         int j = i + 1;
  78         while ((j < list.size()) && (list.get(j).getIndexInFile() == list.get(j - 1).getIndexInFile() + 1)) {
  79           j++;
  80         }
  81         textSetBuilder.add(list.subList(i, j));
  82         i = j;
  83       }
  84     }
  85
  86     return textSetBuilder.build();
  87   }
  88
  89   private static Map<String, List<Block>> retrieveFromIndex(CloneIndex index, String originResourceId, Set<ByteArray> hashes) {
  90     Map<String, List<Block>> collection = Maps.newHashMap();
  91     for (ByteArray hash : hashes) {
  92       Collection<Block> blocks = index.getBySequenceHash(hash);
  93       for (Block blockFromIndex : blocks) {
  94         // Godin: skip blocks for this file if they come from index
  95         String resourceId = blockFromIndex.getResourceId();
  96         if (!originResourceId.equals(resourceId)) {
  97           List<Block> list = collection.get(resourceId);
  98           if (list == null) {
  99             list = Lists.newArrayList();
 100             collection.put(resourceId, list);
 101           }
 102           list.add(blockFromIndex);
 103         }
 104       }
 105     }
 106     return collection;
 107   }
 108
 109   private static final Comparator<Block> BLOCK_COMPARATOR = new Comparator<Block>() {
 110     public int compare(Block o1, Block o2) {
 111       return o1.getIndexInFile() - o2.getIndexInFile();
 112     }
 113   };
 114
 115 }