3 * Copyright (C) 2009-2017 SonarSource SA
4 * mailto:info AT sonarsource DOT com
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 3 of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with this program; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 package org.sonar.server.computation.task.projectanalysis.filemove;
22 import com.google.common.base.Splitter;
23 import com.google.common.collect.ArrayListMultimap;
24 import com.google.common.collect.ImmutableMap;
25 import com.google.common.collect.ImmutableSet;
26 import com.google.common.collect.Multimap;
27 import com.google.common.collect.Sets;
28 import java.util.ArrayList;
29 import java.util.HashSet;
30 import java.util.Iterator;
31 import java.util.List;
34 import javax.annotation.CheckForNull;
35 import javax.annotation.Nullable;
36 import javax.annotation.concurrent.Immutable;
37 import org.sonar.api.resources.Qualifiers;
38 import org.sonar.api.utils.log.Logger;
39 import org.sonar.api.utils.log.Loggers;
40 import org.sonar.core.hash.SourceLinesHashesComputer;
41 import org.sonar.core.util.CloseableIterator;
42 import org.sonar.db.DbClient;
43 import org.sonar.db.DbSession;
44 import org.sonar.db.component.ComponentDto;
45 import org.sonar.db.component.ComponentTreeQuery;
46 import org.sonar.db.component.ComponentTreeQuery.Strategy;
47 import org.sonar.db.source.FileSourceDto;
48 import org.sonar.server.computation.task.projectanalysis.analysis.Analysis;
49 import org.sonar.server.computation.task.projectanalysis.analysis.AnalysisMetadataHolder;
50 import org.sonar.server.computation.task.projectanalysis.component.Component;
51 import org.sonar.server.computation.task.projectanalysis.component.CrawlerDepthLimit;
52 import org.sonar.server.computation.task.projectanalysis.component.DepthTraversalTypeAwareCrawler;
53 import org.sonar.server.computation.task.projectanalysis.component.TreeRootHolder;
54 import org.sonar.server.computation.task.projectanalysis.component.TypeAwareVisitorAdapter;
55 import org.sonar.server.computation.task.projectanalysis.filemove.FileSimilarity.File;
56 import org.sonar.server.computation.task.projectanalysis.source.SourceLinesRepository;
57 import org.sonar.server.computation.task.step.ComputationStep;
59 import static com.google.common.base.MoreObjects.firstNonNull;
60 import static com.google.common.base.Splitter.on;
61 import static com.google.common.collect.FluentIterable.from;
62 import static java.util.Arrays.asList;
63 import static org.sonar.server.computation.task.projectanalysis.component.ComponentVisitor.Order.POST_ORDER;
65 public class FileMoveDetectionStep implements ComputationStep {
66 protected static final int MIN_REQUIRED_SCORE = 85;
67 private static final Logger LOG = Loggers.get(FileMoveDetectionStep.class);
68 private static final List<String> FILE_QUALIFIERS = asList(Qualifiers.FILE, Qualifiers.UNIT_TEST_FILE);
69 private static final Splitter LINES_HASHES_SPLITTER = on('\n');
71 private final AnalysisMetadataHolder analysisMetadataHolder;
72 private final TreeRootHolder rootHolder;
73 private final DbClient dbClient;
74 private final SourceLinesRepository sourceLinesRepository;
75 private final FileSimilarity fileSimilarity;
76 private final MutableMovedFilesRepository movedFilesRepository;
78 public FileMoveDetectionStep(AnalysisMetadataHolder analysisMetadataHolder, TreeRootHolder rootHolder, DbClient dbClient,
79 SourceLinesRepository sourceLinesRepository, FileSimilarity fileSimilarity, MutableMovedFilesRepository movedFilesRepository) {
80 this.analysisMetadataHolder = analysisMetadataHolder;
81 this.rootHolder = rootHolder;
82 this.dbClient = dbClient;
83 this.sourceLinesRepository = sourceLinesRepository;
84 this.fileSimilarity = fileSimilarity;
85 this.movedFilesRepository = movedFilesRepository;
89 public String getDescription() {
90 return "Detect file moves";
94 public void execute() {
95 // do nothing if no files in db (first analysis)
96 Analysis baseProjectAnalysis = analysisMetadataHolder.getBaseAnalysis();
97 if (baseProjectAnalysis == null) {
98 LOG.debug("First analysis. Do nothing.");
102 Map<String, DbComponent> dbFilesByKey = getDbFilesByKey();
103 if (dbFilesByKey.isEmpty()) {
104 LOG.debug("Previous snapshot has no file. Do nothing.");
108 Map<String, Component> reportFilesByKey = getReportFilesByKey(this.rootHolder.getRoot());
109 if (reportFilesByKey.isEmpty()) {
110 LOG.debug("No files in report. Do nothing.");
114 Set<String> addedFileKeys = ImmutableSet.copyOf(Sets.difference(reportFilesByKey.keySet(), dbFilesByKey.keySet()));
115 Set<String> removedFileKeys = ImmutableSet.copyOf(Sets.difference(dbFilesByKey.keySet(), reportFilesByKey.keySet()));
117 // can find matches if at least one of the added or removed files groups is empty => abort
118 if (addedFileKeys.isEmpty() || removedFileKeys.isEmpty()) {
119 LOG.debug("Either no files added or no files removed. Do nothing.");
123 // retrieve file data from report
124 Map<String, File> reportFileSourcesByKey = getReportFileSourcesByKey(reportFilesByKey, addedFileKeys);
126 // compute score matrix
127 ScoreMatrix scoreMatrix = computeScoreMatrix(dbFilesByKey, removedFileKeys, reportFileSourcesByKey);
128 printIfDebug(scoreMatrix);
130 // not a single match with score higher than MIN_REQUIRED_SCORE => abort
131 if (scoreMatrix.getMaxScore() < MIN_REQUIRED_SCORE) {
132 LOG.debug("max score in matrix is less than min required score (%s). Do nothing.", MIN_REQUIRED_SCORE);
136 MatchesByScore matchesByScore = MatchesByScore.create(scoreMatrix);
138 ElectedMatches electedMatches = electMatches(removedFileKeys, reportFileSourcesByKey, matchesByScore);
140 registerMatches(dbFilesByKey, reportFilesByKey, electedMatches);
143 private void registerMatches(Map<String, DbComponent> dbFilesByKey, Map<String, Component> reportFilesByKey, ElectedMatches electedMatches) {
144 for (Match validatedMatch : electedMatches) {
145 movedFilesRepository.setOriginalFile(
146 reportFilesByKey.get(validatedMatch.getReportKey()),
147 toOriginalFile(dbFilesByKey.get(validatedMatch.getDbKey())));
148 LOG.debug("File move found: {}", validatedMatch);
152 private Map<String, DbComponent> getDbFilesByKey() {
153 try (DbSession dbSession = dbClient.openSession(false)) {
154 // FIXME no need to use such a complex query, joining on SNAPSHOTS and retrieving all column of table PROJECTS, replace with dedicated
156 List<ComponentDto> componentDtos = dbClient.componentDao().selectDescendants(
158 ComponentTreeQuery.builder()
159 .setBaseUuid(rootHolder.getRoot().getUuid())
160 .setQualifiers(FILE_QUALIFIERS)
161 .setStrategy(Strategy.LEAVES)
163 return from(componentDtos)
164 .transform(componentDto -> new DbComponent(componentDto.getId(), componentDto.getDbKey(), componentDto.uuid(), componentDto.path()))
165 .uniqueIndex(DbComponent::getKey);
169 private static Map<String, Component> getReportFilesByKey(Component root) {
170 final ImmutableMap.Builder<String, Component> builder = ImmutableMap.builder();
171 new DepthTraversalTypeAwareCrawler(
172 new TypeAwareVisitorAdapter(CrawlerDepthLimit.FILE, POST_ORDER) {
174 public void visitFile(Component file) {
175 builder.put(file.getKey(), file);
178 return builder.build();
181 private Map<String, File> getReportFileSourcesByKey(Map<String, Component> reportFilesByKey, Set<String> addedFileKeys) {
182 ImmutableMap.Builder<String, File> builder = ImmutableMap.builder();
183 for (String fileKey : addedFileKeys) {
184 // FIXME computation of sourceHash and lineHashes might be done multiple times for some files: here, in ComputeFileSourceData, in
185 // SourceHashRepository
186 Component component = reportFilesByKey.get(fileKey);
187 SourceLinesHashesComputer linesHashesComputer = new SourceLinesHashesComputer();
188 try (CloseableIterator<String> lineIterator = sourceLinesRepository.readLines(component)) {
189 while (lineIterator.hasNext()) {
190 String line = lineIterator.next();
191 linesHashesComputer.addLine(line);
194 builder.put(fileKey, new File(component.getReportAttributes().getPath(), linesHashesComputer.getLineHashes()));
196 return builder.build();
199 private ScoreMatrix computeScoreMatrix(Map<String, DbComponent> dtosByKey, Set<String> dbFileKeys, Map<String, File> reportFileSourcesByKey) {
200 int[][] scoreMatrix = new int[dbFileKeys.size()][reportFileSourcesByKey.size()];
203 try (DbSession dbSession = dbClient.openSession(false)) {
205 for (String removedFileKey : dbFileKeys) {
206 File fileInDb = getFile(dbSession, dtosByKey.get(removedFileKey));
207 if (fileInDb == null) {
211 int reportFileIndex = 0;
212 for (Map.Entry<String, File> reportFileSourceAndKey : reportFileSourcesByKey.entrySet()) {
213 File unmatchedFile = reportFileSourceAndKey.getValue();
214 int score = fileSimilarity.score(fileInDb, unmatchedFile);
215 scoreMatrix[dbFileIndex][reportFileIndex] = score;
216 if (score > maxScore) {
225 return new ScoreMatrix(dbFileKeys, reportFileSourcesByKey, scoreMatrix, maxScore);
229 private File getFile(DbSession dbSession, DbComponent dbComponent) {
230 if (dbComponent.getPath() == null) {
233 FileSourceDto fileSourceDto = dbClient.fileSourceDao().selectSourceByFileUuid(dbSession, dbComponent.getUuid());
234 if (fileSourceDto == null) {
237 String lineHashes = firstNonNull(fileSourceDto.getLineHashes(), "");
238 return new File(dbComponent.getPath(), LINES_HASHES_SPLITTER.splitToList(lineHashes));
241 private static void printIfDebug(ScoreMatrix scoreMatrix) {
242 if (LOG.isDebugEnabled()) {
243 LOG.debug("ScoreMatrix:\n" + scoreMatrix.toCsv(';'));
247 private static ElectedMatches electMatches(Set<String> dbFileKeys, Map<String, File> reportFileSourcesByKey, MatchesByScore matchesByScore) {
248 ElectedMatches electedMatches = new ElectedMatches(matchesByScore, dbFileKeys, reportFileSourcesByKey);
249 Multimap<String, Match> matchesPerFileForScore = ArrayListMultimap.create();
250 matchesByScore.forEach(matches -> electMatches(matches, electedMatches, matchesPerFileForScore));
251 return electedMatches;
254 private static void electMatches(@Nullable List<Match> matches, ElectedMatches electedMatches, Multimap<String, Match> matchesPerFileForScore) {
255 // no match for this score value, ignore
256 if (matches == null) {
260 List<Match> matchesToValidate = electedMatches.filter(matches);
261 if (matchesToValidate.isEmpty()) {
264 if (matchesToValidate.size() == 1) {
265 Match match = matchesToValidate.get(0);
266 electedMatches.add(match);
268 matchesPerFileForScore.clear();
269 for (Match match : matchesToValidate) {
270 matchesPerFileForScore.put(match.getDbKey(), match);
271 matchesPerFileForScore.put(match.getReportKey(), match);
273 // validate non ambiguous matches (ie. the match is the only match of either the db file and the report file)
274 for (Match match : matchesToValidate) {
275 int dbFileMatchesCount = matchesPerFileForScore.get(match.getDbKey()).size();
276 int reportFileMatchesCount = matchesPerFileForScore.get(match.getReportKey()).size();
277 if (dbFileMatchesCount == 1 && reportFileMatchesCount == 1) {
278 electedMatches.add(match);
284 private static MovedFilesRepository.OriginalFile toOriginalFile(DbComponent dbComponent) {
285 return new MovedFilesRepository.OriginalFile(dbComponent.getId(), dbComponent.getUuid(), dbComponent.getKey());
289 private static final class DbComponent {
290 private final long id;
291 private final String key;
292 private final String uuid;
293 private final String path;
295 private DbComponent(long id, String key, String uuid, String path) {
302 public long getId() {
306 public String getKey() {
310 public String getUuid() {
314 public String getPath() {
319 private static class ElectedMatches implements Iterable<Match> {
320 private final List<Match> matches;
321 private final Set<String> matchedFileKeys;
323 public ElectedMatches(MatchesByScore matchesByScore, Set<String> dbFileKeys, Map<String, File> reportFileSourcesByKey) {
324 this.matches = new ArrayList<>(matchesByScore.getSize());
325 this.matchedFileKeys = new HashSet<>(dbFileKeys.size() + reportFileSourcesByKey.size());
328 public void add(Match match) {
330 matchedFileKeys.add(match.getDbKey());
331 matchedFileKeys.add(match.getReportKey());
334 public List<Match> filter(Iterable<Match> matches) {
335 return from(matches).filter(this::notAlreadyMatched).toList();
338 private boolean notAlreadyMatched(Match input) {
339 return !(matchedFileKeys.contains(input.getDbKey()) || matchedFileKeys.contains(input.getReportKey()));
343 public Iterator<Match> iterator() {
344 return matches.iterator();