2 * Sonar, open source software quality management tool.
3 * Copyright (C) 2008-2012 SonarSource
4 * mailto:contact AT sonarsource DOT com
6 * Sonar is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 3 of the License, or (at your option) any later version.
11 * Sonar is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Sonar; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
20 package org.sonar.duplications.detector.suffixtree;
22 import com.google.common.collect.Lists;
23 import org.sonar.duplications.block.Block;
24 import org.sonar.duplications.detector.ContainsInComparator;
25 import org.sonar.duplications.index.CloneGroup;
26 import org.sonar.duplications.index.ClonePart;
27 import org.sonar.duplications.utils.SortedListsUtils;
29 import java.util.Collections;
30 import java.util.List;
33 * Implementation of {@link Search.Collector}, which constructs {@link CloneGroup}s.
35 public class DuplicationsCollector extends Search.Collector {
37 private final TextSet text;
38 private final String originResourceId;
40 private final List<CloneGroup> filtered = Lists.newArrayList();
44 private int[][] blockNumbers;
46 public DuplicationsCollector(TextSet text) {
48 this.originResourceId = text.getBlock(0).getResourceId();
52 * @return current result
54 public List<CloneGroup> getResult() {
59 public void startOfGroup(int size, int length) {
60 this.blockNumbers = new int[size][2];
65 * Constructs ClonePart and saves it for future processing in {@link #endOfGroup()}.
67 * @param start number of first block from text for this part
68 * @param end number of last block from text for this part
69 * @param len number of blocks in this part
72 public void part(int start, int end) {
73 blockNumbers[count][0] = start;
74 blockNumbers[count][1] = end - 1;
79 * Constructs CloneGroup and saves it.
82 public void endOfGroup() {
83 int lengthInUnits = 0;
84 ClonePart origin = null;
85 List<ClonePart> parts = Lists.newArrayListWithCapacity(count);
86 for (int[] b : blockNumbers) {
87 Block firstBlock = text.getBlock(b[0]);
88 Block lastBlock = text.getBlock(b[1]);
89 ClonePart part = new ClonePart(
90 firstBlock.getResourceId(),
91 firstBlock.getIndexInFile(),
92 firstBlock.getStartLine(),
93 lastBlock.getEndLine());
95 // TODO Godin: maybe use FastStringComparator here ?
96 if (originResourceId.equals(part.getResourceId())) { // part from origin
99 // To calculate length important to use the origin, because otherwise block may come from DB without required data
100 lengthInUnits = lastBlock.getEndUnit() - firstBlock.getStartUnit() + 1;
101 } else if (part.getUnitStart() < origin.getUnitStart()) {
109 Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR);
111 CloneGroup group = new CloneGroup(length, origin, parts);
112 group.setLengthInUnits(lengthInUnits);
120 * Prepare for processing of next duplication.
122 private void reset() {
128 * Saves CloneGroup, if it is not included into previously saved.
130 * Current CloneGroup can not include none of CloneGroup, which were constructed before.
132 * According to an order of visiting nodes in suffix tree - length of earlier >= length of current.
133 * If length of earlier > length of current, then earlier not contained in current.
134 * If length of earlier = length of current, then earlier can be contained in current only
135 * when current has exactly the same and maybe some additional CloneParts as earlier,
136 * what in his turn will mean that two inner-nodes on same depth will satisfy condition
137 * current.startSize <= earlier.startSize <= earlier.endSize <= current.endSize , which is not possible for different inner-nodes on same depth.
139 * Thus this method checks only that none of CloneGroup, which was constructed before, does not include current CloneGroup.
141 private void filter(CloneGroup current) {
142 for (CloneGroup earlier : filtered) {
143 if (containsIn(current, earlier)) {
147 filtered.add(current);
151 * Checks that second CloneGroup includes first one.
153 * CloneGroup A is included in another CloneGroup B, if every part pA from A has part pB in B,
154 * which satisfy the conditions:
156 * (pA.resourceId == pB.resourceId) and (pB.unitStart <= pA.unitStart) and (pA.unitEnd <= pB.unitEnd)
158 * And all resourcesId from B exactly the same as all resourceId from A, which means that also every part pB from B has part pA in A,
159 * which satisfy the condition:
161 * pB.resourceId == pA.resourceId
163 * Inclusion is the partial order, thus this relation is:
165 * <li>reflexive - A in A</li>
166 * <li>transitive - (A in B) and (B in C) => (A in C)</li>
167 * <li>antisymmetric - (A in B) and (B in A) <=> (A = B)</li>
171 * This method uses the fact that all parts already sorted by resourceId and unitStart (see {@link ContainsInComparator#CLONEPART_COMPARATOR}),
172 * so running time - O(|A|+|B|).
175 private static boolean containsIn(CloneGroup first, CloneGroup second) {
176 List<ClonePart> firstParts = first.getCloneParts();
177 List<ClonePart> secondParts = second.getCloneParts();
178 // TODO Godin: according to tests seems that if first part of condition is true, then second part can not be false
179 // if this can be proved, then second part can be removed
180 return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(second.getCloneUnitLength(), first.getCloneUnitLength()))
181 && SortedListsUtils.contains(firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR);