3 * Copyright (C) 2009-2016 SonarSource SA
4 * mailto:contact AT sonarsource DOT com
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 3 of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with this program; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 package org.sonar.server.computation.task.projectanalysis.filemove;
22 import java.util.List;
24 import static java.lang.Math.max;
25 import static java.lang.Math.min;
27 // TODO evaluate https://commons.apache.org/sandbox/commons-text/jacoco/org.apache.commons.text.similarity/CosineSimilarity.java.html
28 // TODO another possible algorithm : just take the intersection of line hashes. That would allow to support block moving.
29 public class SourceSimilarityImpl implements SourceSimilarity {
31 // TODO verify algorithm http://stackoverflow.com/questions/6087281/similarity-score-levenshtein
33 public <T extends Object> int score(List<T> left, List<T> right) {
34 int distance = levenshteinDistance(left, right);
35 return (int) (100 * (1.0 - ((double) distance) / (max(left.size(), right.size()))));
38 // TODO verify https://commons.apache.org/sandbox/commons-text/jacoco/org.apache.commons.text.similarity/LevenshteinDistance.java.html
39 <T extends Object> int levenshteinDistance(List<T> left, List<T> right) {
40 int len0 = left.size() + 1;
41 int len1 = right.size() + 1;
43 // the array of distances
44 int[] cost = new int[len0];
45 int[] newcost = new int[len0];
47 // initial cost of skipping prefix in String s0
48 for (int i = 0; i < len0; i++) {
52 // dynamically computing the array of distances
54 // transformation cost for each letter in s1
55 for (int j = 1; j < len1; j++) {
56 // initial cost of skipping prefix in String s1
59 // transformation cost for each letter in s0
60 for (int i = 1; i < len0; i++) {
61 // matching current letters in both strings
62 int match = left.get(i - 1).equals(right.get(j - 1)) ? 0 : 1;
64 // computing cost for each transformation
65 int costReplace = cost[i - 1] + match;
66 int costInsert = cost[i] + 1;
67 int costDelete = newcost[i - 1] + 1;
70 newcost[i] = min(min(costInsert, costDelete), costReplace);
73 // swap cost/newcost arrays
79 // the distance is the cost for transforming all letters in both strings
80 return cost[len0 - 1];