You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

CpdExecutor.java 9.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. /*
  2. * SonarQube
  3. * Copyright (C) 2009-2022 SonarSource SA
  4. * mailto:info AT sonarsource DOT com
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 3 of the License, or (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public License
  17. * along with this program; if not, write to the Free Software Foundation,
  18. * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  19. */
  20. package org.sonar.scanner.cpd;
  21. import java.util.ArrayList;
  22. import java.util.Collection;
  23. import java.util.Iterator;
  24. import java.util.List;
  25. import java.util.Optional;
  26. import java.util.concurrent.ExecutorService;
  27. import java.util.concurrent.Executors;
  28. import java.util.concurrent.Future;
  29. import java.util.concurrent.TimeUnit;
  30. import java.util.concurrent.TimeoutException;
  31. import java.util.function.Function;
  32. import java.util.function.Predicate;
  33. import java.util.stream.Collectors;
  34. import javax.inject.Inject;
  35. import org.sonar.api.batch.fs.InputComponent;
  36. import org.sonar.api.batch.fs.internal.DefaultInputComponent;
  37. import org.sonar.api.batch.fs.internal.DefaultInputFile;
  38. import org.sonar.api.utils.log.Logger;
  39. import org.sonar.api.utils.log.Loggers;
  40. import org.sonar.duplications.block.Block;
  41. import org.sonar.duplications.detector.suffixtree.SuffixTreeCloneDetectionAlgorithm;
  42. import org.sonar.duplications.index.CloneGroup;
  43. import org.sonar.duplications.index.ClonePart;
  44. import org.sonar.duplications.index.PackedMemoryCloneIndex.ResourceBlocks;
  45. import org.sonar.scanner.cpd.index.SonarCpdBlockIndex;
  46. import org.sonar.scanner.protocol.output.ScannerReport;
  47. import org.sonar.scanner.protocol.output.ScannerReport.Duplicate;
  48. import org.sonar.scanner.protocol.output.ScannerReport.Duplication;
  49. import org.sonar.scanner.report.ReportPublisher;
  50. import org.sonar.scanner.scan.filesystem.InputComponentStore;
  51. import org.sonar.scanner.util.ProgressReport;
  52. /**
  53. * Runs on the root module, at the end of the project analysis.
  54. * It executes copy paste detection involving all files of all modules, which were indexed during sensors execution for each module.
  55. * The sensors are responsible for handling exclusions and block sizes.
  56. */
  57. public class CpdExecutor {
  58. private static final Logger LOG = Loggers.get(CpdExecutor.class);
  59. // timeout for the computation of duplicates in a file (seconds)
  60. private static final int TIMEOUT = 5 * 60 * 1000;
  61. static final int MAX_CLONE_GROUP_PER_FILE = 100;
  62. static final int MAX_CLONE_PART_PER_GROUP = 100;
  63. private final SonarCpdBlockIndex index;
  64. private final ReportPublisher publisher;
  65. private final InputComponentStore componentStore;
  66. private final ProgressReport progressReport;
  67. private final CpdSettings settings;
  68. private final ExecutorService executorService;
  69. private int count = 0;
  70. private int total;
  71. @Inject
  72. public CpdExecutor(CpdSettings settings, SonarCpdBlockIndex index, ReportPublisher publisher, InputComponentStore inputComponentCache) {
  73. this(settings, index, publisher, inputComponentCache, Executors.newSingleThreadExecutor());
  74. }
  75. public CpdExecutor(CpdSettings settings, SonarCpdBlockIndex index, ReportPublisher publisher, InputComponentStore inputComponentCache, ExecutorService executorService) {
  76. this.settings = settings;
  77. this.index = index;
  78. this.publisher = publisher;
  79. this.componentStore = inputComponentCache;
  80. this.progressReport = new ProgressReport("CPD computation", TimeUnit.SECONDS.toMillis(10));
  81. this.executorService = executorService;
  82. }
  83. public void execute() {
  84. execute(TIMEOUT);
  85. }
  86. void execute(long timeout) {
  87. List<FileBlocks> components = new ArrayList<>(index.noResources());
  88. Iterator<ResourceBlocks> it = index.iterator();
  89. while (it.hasNext()) {
  90. ResourceBlocks resourceBlocks = it.next();
  91. Optional<FileBlocks> fileBlocks = toFileBlocks(resourceBlocks.resourceId(), resourceBlocks.blocks());
  92. if (!fileBlocks.isPresent()) {
  93. continue;
  94. }
  95. components.add(fileBlocks.get());
  96. }
  97. int filesWithoutBlocks = index.noIndexedFiles() - index.noResources();
  98. if (filesWithoutBlocks > 0) {
  99. LOG.info("CPD Executor {} {} had no CPD blocks", filesWithoutBlocks, pluralize(filesWithoutBlocks));
  100. }
  101. total = components.size();
  102. progressReport.start(String.format("CPD Executor Calculating CPD for %d %s", total, pluralize(total)));
  103. try {
  104. for (FileBlocks fileBlocks : components) {
  105. runCpdAnalysis(executorService, fileBlocks.getInputFile(), fileBlocks.getBlocks(), timeout);
  106. count++;
  107. }
  108. progressReport.stopAndLogTotalTime("CPD Executor CPD calculation finished");
  109. } catch (Exception e) {
  110. progressReport.stop("");
  111. throw e;
  112. } finally {
  113. executorService.shutdown();
  114. }
  115. }
  116. private static String pluralize(int files) {
  117. return files == 1 ? "file" : "files";
  118. }
  119. void runCpdAnalysis(ExecutorService executorService, DefaultInputFile inputFile, Collection<Block> fileBlocks, long timeout) {
  120. LOG.debug("Detection of duplications for {}", inputFile.absolutePath());
  121. progressReport.message(String.format("%d/%d - current file: %s", count, total, inputFile.absolutePath()));
  122. List<CloneGroup> duplications;
  123. Future<List<CloneGroup>> futureResult = executorService.submit(() -> SuffixTreeCloneDetectionAlgorithm.detect(index, fileBlocks));
  124. try {
  125. duplications = futureResult.get(timeout, TimeUnit.MILLISECONDS);
  126. } catch (TimeoutException e) {
  127. LOG.warn("Timeout during detection of duplications for {}", inputFile.absolutePath());
  128. futureResult.cancel(true);
  129. return;
  130. } catch (Exception e) {
  131. throw new IllegalStateException("Fail during detection of duplication for " + inputFile.absolutePath(), e);
  132. }
  133. List<CloneGroup> filtered;
  134. if (!"java".equalsIgnoreCase(inputFile.language())) {
  135. int minTokens = settings.getMinimumTokens(inputFile.language());
  136. Predicate<CloneGroup> minimumTokensPredicate = DuplicationPredicates.numberOfUnitsNotLessThan(minTokens);
  137. filtered = duplications.stream()
  138. .filter(minimumTokensPredicate)
  139. .collect(Collectors.toList());
  140. } else {
  141. filtered = duplications;
  142. }
  143. saveDuplications(inputFile, filtered);
  144. }
  145. final void saveDuplications(final DefaultInputComponent component, List<CloneGroup> duplications) {
  146. if (duplications.size() > MAX_CLONE_GROUP_PER_FILE) {
  147. LOG.warn("Too many duplication groups on file {}. Keep only the first {} groups.", component, MAX_CLONE_GROUP_PER_FILE);
  148. }
  149. Iterable<ScannerReport.Duplication> reportDuplications = duplications.stream()
  150. .limit(MAX_CLONE_GROUP_PER_FILE)
  151. .map(
  152. new Function<CloneGroup, Duplication>() {
  153. private final ScannerReport.Duplication.Builder dupBuilder = ScannerReport.Duplication.newBuilder();
  154. private final ScannerReport.Duplicate.Builder blockBuilder = ScannerReport.Duplicate.newBuilder();
  155. @Override
  156. public ScannerReport.Duplication apply(CloneGroup input) {
  157. return toReportDuplication(component, dupBuilder, blockBuilder, input);
  158. }
  159. })::iterator;
  160. publisher.getWriter().writeComponentDuplications(component.scannerId(), reportDuplications);
  161. }
  162. private Optional<FileBlocks> toFileBlocks(String componentKey, Collection<Block> fileBlocks) {
  163. DefaultInputFile component = (DefaultInputFile) componentStore.getByKey(componentKey);
  164. if (component == null) {
  165. LOG.error("Resource not found in component store: {}. Skipping CPD computation for it", componentKey);
  166. return Optional.empty();
  167. }
  168. return Optional.of(new FileBlocks(component, fileBlocks));
  169. }
  170. private Duplication toReportDuplication(InputComponent component, Duplication.Builder dupBuilder, Duplicate.Builder blockBuilder, CloneGroup input) {
  171. dupBuilder.clear();
  172. ClonePart originBlock = input.getOriginPart();
  173. blockBuilder.clear();
  174. dupBuilder.setOriginPosition(ScannerReport.TextRange.newBuilder()
  175. .setStartLine(originBlock.getStartLine())
  176. .setEndLine(originBlock.getEndLine())
  177. .build());
  178. int clonePartCount = 0;
  179. for (ClonePart duplicate : input.getCloneParts()) {
  180. if (!duplicate.equals(originBlock)) {
  181. clonePartCount++;
  182. if (clonePartCount > MAX_CLONE_PART_PER_GROUP) {
  183. LOG.warn("Too many duplication references on file " + component + " for block at line " +
  184. originBlock.getStartLine() + ". Keep only the first "
  185. + MAX_CLONE_PART_PER_GROUP + " references.");
  186. break;
  187. }
  188. blockBuilder.clear();
  189. String componentKey = duplicate.getResourceId();
  190. if (!component.key().equals(componentKey)) {
  191. DefaultInputComponent sameProjectComponent = (DefaultInputComponent) componentStore.getByKey(componentKey);
  192. blockBuilder.setOtherFileRef(sameProjectComponent.scannerId());
  193. }
  194. dupBuilder.addDuplicate(blockBuilder
  195. .setRange(ScannerReport.TextRange.newBuilder()
  196. .setStartLine(duplicate.getStartLine())
  197. .setEndLine(duplicate.getEndLine())
  198. .build())
  199. .build());
  200. }
  201. }
  202. return dupBuilder.build();
  203. }
  204. private static class FileBlocks {
  205. private final DefaultInputFile inputFile;
  206. private final Collection<Block> blocks;
  207. public FileBlocks(DefaultInputFile inputFile, Collection<Block> blocks) {
  208. this.inputFile = inputFile;
  209. this.blocks = blocks;
  210. }
  211. public DefaultInputFile getInputFile() {
  212. return inputFile;
  213. }
  214. public Collection<Block> getBlocks() {
  215. return blocks;
  216. }
  217. }
  218. }