3 * Copyright (C) 2009-2016 SonarSource SA
4 * mailto:contact AT sonarsource DOT com
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 3 of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with this program; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 package org.sonar.server.computation.task.projectanalysis.filemove;
22 import com.google.common.base.Splitter;
23 import com.google.common.collect.ArrayListMultimap;
24 import com.google.common.collect.ImmutableMap;
25 import com.google.common.collect.ImmutableSet;
26 import com.google.common.collect.Multimap;
27 import com.google.common.collect.Sets;
28 import java.util.ArrayList;
29 import java.util.HashSet;
30 import java.util.Iterator;
31 import java.util.List;
34 import javax.annotation.CheckForNull;
35 import javax.annotation.concurrent.Immutable;
36 import org.sonar.api.resources.Qualifiers;
37 import org.sonar.api.utils.log.Logger;
38 import org.sonar.api.utils.log.Loggers;
39 import org.sonar.core.hash.SourceHashComputer;
40 import org.sonar.core.hash.SourceLinesHashesComputer;
41 import org.sonar.core.util.CloseableIterator;
42 import org.sonar.db.DbClient;
43 import org.sonar.db.DbSession;
44 import org.sonar.db.component.ComponentTreeQuery;
45 import org.sonar.db.source.FileSourceDto;
46 import org.sonar.server.computation.task.projectanalysis.analysis.AnalysisMetadataHolder;
47 import org.sonar.server.computation.task.projectanalysis.component.Component;
48 import org.sonar.server.computation.task.projectanalysis.component.CrawlerDepthLimit;
49 import org.sonar.server.computation.task.projectanalysis.component.DepthTraversalTypeAwareCrawler;
50 import org.sonar.server.computation.task.projectanalysis.component.TreeRootHolder;
51 import org.sonar.server.computation.task.projectanalysis.component.TypeAwareVisitorAdapter;
52 import org.sonar.server.computation.task.projectanalysis.filemove.FileSimilarity.File;
53 import org.sonar.server.computation.task.projectanalysis.analysis.Analysis;
54 import org.sonar.server.computation.task.projectanalysis.source.SourceLinesRepository;
55 import org.sonar.server.computation.task.step.ComputationStep;
57 import static com.google.common.base.Splitter.on;
58 import static com.google.common.collect.FluentIterable.from;
59 import static java.util.Arrays.asList;
60 import static java.util.Collections.singletonList;
61 import static org.sonar.server.computation.task.projectanalysis.component.ComponentVisitor.Order.POST_ORDER;
63 public class FileMoveDetectionStep implements ComputationStep {
64 protected static final int MIN_REQUIRED_SCORE = 85;
65 private static final Logger LOG = Loggers.get(FileMoveDetectionStep.class);
66 private static final List<String> FILE_QUALIFIERS = asList(Qualifiers.FILE, Qualifiers.UNIT_TEST_FILE);
67 private static final List<String> SORT_FIELDS = singletonList("name");
68 private static final Splitter LINES_HASHES_SPLITTER = on('\n');
70 private final AnalysisMetadataHolder analysisMetadataHolder;
71 private final TreeRootHolder rootHolder;
72 private final DbClient dbClient;
73 private final SourceLinesRepository sourceLinesRepository;
74 private final FileSimilarity fileSimilarity;
75 private final MutableMovedFilesRepository movedFilesRepository;
77 public FileMoveDetectionStep(AnalysisMetadataHolder analysisMetadataHolder, TreeRootHolder rootHolder, DbClient dbClient,
78 SourceLinesRepository sourceLinesRepository, FileSimilarity fileSimilarity, MutableMovedFilesRepository movedFilesRepository) {
79 this.analysisMetadataHolder = analysisMetadataHolder;
80 this.rootHolder = rootHolder;
81 this.dbClient = dbClient;
82 this.sourceLinesRepository = sourceLinesRepository;
83 this.fileSimilarity = fileSimilarity;
84 this.movedFilesRepository = movedFilesRepository;
88 public String getDescription() {
89 return "Detect file moves";
93 public void execute() {
94 // do nothing if no files in db (first analysis)
95 Analysis baseProjectAnalysis = analysisMetadataHolder.getBaseProjectSnapshot();
96 if (baseProjectAnalysis == null) {
97 LOG.debug("First analysis. Do nothing.");
101 Map<String, DbComponent> dbFilesByKey = getDbFilesByKey();
102 if (dbFilesByKey.isEmpty()) {
103 LOG.debug("Previous snapshot has no file. Do nothing.");
107 Map<String, Component> reportFilesByKey = getReportFilesByKey(this.rootHolder.getRoot());
108 if (reportFilesByKey.isEmpty()) {
109 LOG.debug("No files in report. Do nothing.");
113 Set<String> addedFileKeys = ImmutableSet.copyOf(Sets.difference(reportFilesByKey.keySet(), dbFilesByKey.keySet()));
114 Set<String> removedFileKeys = ImmutableSet.copyOf(Sets.difference(dbFilesByKey.keySet(), reportFilesByKey.keySet()));
116 // can find matches if at least one of the added or removed files groups is empty => abort
117 if (addedFileKeys.isEmpty() || removedFileKeys.isEmpty()) {
118 LOG.debug("Either no files added or no files removed. Do nothing.");
122 // retrieve file data from report
123 Map<String, File> reportFileSourcesByKey = getReportFileSourcesByKey(reportFilesByKey, addedFileKeys);
125 // compute score matrix
126 ScoreMatrix scoreMatrix = computeScoreMatrix(dbFilesByKey, removedFileKeys, reportFileSourcesByKey);
127 printIfDebug(scoreMatrix);
129 // not a single match with score higher than MIN_REQUIRED_SCORE => abort
130 if (scoreMatrix.getMaxScore() < MIN_REQUIRED_SCORE) {
131 LOG.debug("max score in matrix is less than min required score (%s). Do nothing.", MIN_REQUIRED_SCORE);
135 MatchesByScore matchesByScore = MatchesByScore.create(scoreMatrix);
137 ElectedMatches electedMatches = electMatches(removedFileKeys, reportFileSourcesByKey, matchesByScore);
139 registerMatches(dbFilesByKey, reportFilesByKey, electedMatches);
142 private void registerMatches(Map<String, DbComponent> dbFilesByKey, Map<String, Component> reportFilesByKey, ElectedMatches electedMatches) {
143 for (Match validatedMatch : electedMatches) {
144 movedFilesRepository.setOriginalFile(
145 reportFilesByKey.get(validatedMatch.getReportKey()),
146 toOriginalFile(dbFilesByKey.get(validatedMatch.getDbKey())));
147 LOG.debug("File move found: {}", validatedMatch);
151 private Map<String, DbComponent> getDbFilesByKey() {
152 try (DbSession dbSession = dbClient.openSession(false)) {
153 // FIXME no need to use such a complex query, joining on SNAPSHOTS and retrieving all column of table PROJECTS, replace with dedicated
155 return from(dbClient.componentDao().selectDescendants(
157 ComponentTreeQuery.builder()
158 .setBaseUuid(rootHolder.getRoot().getUuid())
159 .setQualifiers(FILE_QUALIFIERS)
160 .setSortFields(SORT_FIELDS)
161 .setPageSize(Integer.MAX_VALUE)
164 .transform(componentDto -> new DbComponent(componentDto.getId(), componentDto.key(), componentDto.uuid(), componentDto.path()))
165 .uniqueIndex(DbComponent::getKey);
169 private static Map<String, Component> getReportFilesByKey(Component root) {
170 final ImmutableMap.Builder<String, Component> builder = ImmutableMap.builder();
171 new DepthTraversalTypeAwareCrawler(
172 new TypeAwareVisitorAdapter(CrawlerDepthLimit.FILE, POST_ORDER) {
174 public void visitFile(Component file) {
175 builder.put(file.getKey(), file);
178 return builder.build();
181 private Map<String, File> getReportFileSourcesByKey(Map<String, Component> reportFilesByKey, Set<String> addedFileKeys) {
182 ImmutableMap.Builder<String, File> builder = ImmutableMap.builder();
183 for (String fileKey : addedFileKeys) {
184 // FIXME computation of sourceHash and lineHashes might be done multiple times for some files: here, in ComputeFileSourceData, in
185 // SourceHashRepository
186 Component component = reportFilesByKey.get(fileKey);
187 SourceLinesHashesComputer linesHashesComputer = new SourceLinesHashesComputer();
188 SourceHashComputer sourceHashComputer = new SourceHashComputer();
189 try (CloseableIterator<String> lineIterator = sourceLinesRepository.readLines(component)) {
190 while (lineIterator.hasNext()) {
191 String line = lineIterator.next();
192 linesHashesComputer.addLine(line);
193 sourceHashComputer.addLine(line, lineIterator.hasNext());
196 builder.put(fileKey, new File(component.getReportAttributes().getPath(), sourceHashComputer.getHash(), linesHashesComputer.getLineHashes()));
198 return builder.build();
201 private ScoreMatrix computeScoreMatrix(Map<String, DbComponent> dtosByKey, Set<String> dbFileKeys, Map<String, File> reportFileSourcesByKey) {
202 int[][] scoreMatrix = new int[dbFileKeys.size()][reportFileSourcesByKey.size()];
205 try (DbSession dbSession = dbClient.openSession(false)) {
207 for (String removedFileKey : dbFileKeys) {
208 File fileInDb = getFile(dbSession, dtosByKey.get(removedFileKey));
209 if (fileInDb == null) {
213 int reportFileIndex = 0;
214 for (Map.Entry<String, File> reportFileSourceAndKey : reportFileSourcesByKey.entrySet()) {
215 File unmatchedFile = reportFileSourceAndKey.getValue();
216 int score = fileSimilarity.score(fileInDb, unmatchedFile);
217 scoreMatrix[dbFileIndex][reportFileIndex] = score;
218 if (score > maxScore) {
227 return new ScoreMatrix(dbFileKeys, reportFileSourcesByKey, scoreMatrix, maxScore);
231 private File getFile(DbSession dbSession, DbComponent dbComponent) {
232 FileSourceDto fileSourceDto = dbClient.fileSourceDao().selectSourceByFileUuid(dbSession, dbComponent.getUuid());
233 if (fileSourceDto == null) {
236 return new File(dbComponent.getPath(), fileSourceDto.getSrcHash(), LINES_HASHES_SPLITTER.splitToList(fileSourceDto.getLineHashes()));
239 private static void printIfDebug(ScoreMatrix scoreMatrix) {
240 if (LOG.isDebugEnabled()) {
241 LOG.debug("ScoreMatrix:\n" + scoreMatrix.toCsv(';'));
245 private static ElectedMatches electMatches(Set<String> dbFileKeys, Map<String, File> reportFileSourcesByKey, MatchesByScore matchesByScore) {
246 ElectedMatches electedMatches = new ElectedMatches(matchesByScore, dbFileKeys, reportFileSourcesByKey);
247 Multimap<String, Match> matchesPerFileForScore = ArrayListMultimap.create();
248 for (List<Match> matches : matchesByScore) {
249 // no match for this score value, ignore
250 if (matches == null) {
254 List<Match> matchesToValidate = electedMatches.filter(matches);
255 if (matchesToValidate.isEmpty()) {
258 if (matchesToValidate.size() == 1) {
259 Match match = matchesToValidate.get(0);
260 electedMatches.add(match);
262 matchesPerFileForScore.clear();
263 for (Match match : matchesToValidate) {
264 matchesPerFileForScore.put(match.getDbKey(), match);
265 matchesPerFileForScore.put(match.getReportKey(), match);
267 // validate non ambiguous matches (ie. the match is the only match of either the db file and the report file)
268 for (Match match : matchesToValidate) {
269 int dbFileMatchesCount = matchesPerFileForScore.get(match.getDbKey()).size();
270 int reportFileMatchesCount = matchesPerFileForScore.get(match.getReportKey()).size();
271 if (dbFileMatchesCount == 1 && reportFileMatchesCount == 1) {
272 electedMatches.add(match);
277 return electedMatches;
280 private static MovedFilesRepository.OriginalFile toOriginalFile(DbComponent dbComponent) {
281 return new MovedFilesRepository.OriginalFile(dbComponent.getId(), dbComponent.getUuid(), dbComponent.getKey());
285 private static final class DbComponent {
286 private final long id;
287 private final String key;
288 private final String uuid;
289 private final String path;
291 private DbComponent(long id, String key, String uuid, String path) {
298 public long getId() {
302 public String getKey() {
306 public String getUuid() {
310 public String getPath() {
315 private static class ElectedMatches implements Iterable<Match> {
316 private final List<Match> matches;
317 private final Set<String> matchedFileKeys;
319 public ElectedMatches(MatchesByScore matchesByScore, Set<String> dbFileKeys, Map<String, File> reportFileSourcesByKey) {
320 this.matches = new ArrayList<>(matchesByScore.getSize());
321 this.matchedFileKeys = new HashSet<>(dbFileKeys.size() + reportFileSourcesByKey.size());
324 public void add(Match match) {
326 matchedFileKeys.add(match.getDbKey());
327 matchedFileKeys.add(match.getReportKey());
330 public List<Match> filter(Iterable<Match> matches) {
331 return from(matches).filter(this::notAlreadyMatched).toList();
334 private boolean notAlreadyMatched(Match input) {
335 return !(matchedFileKeys.contains(input.getDbKey()) || matchedFileKeys.contains(input.getReportKey()));
339 public Iterator<Match> iterator() {
340 return matches.iterator();