aboutsummaryrefslogtreecommitdiffstats
path: root/sonar-duplications
diff options
context:
space:
mode:
authorEvgeny Mandrikov <mandrikov@gmail.com>2012-01-25 19:07:33 +0400
committerEvgeny Mandrikov <mandrikov@gmail.com>2012-01-26 13:58:50 +0400
commit90a6c00024ca6fc4476c6ecf7ab30e44e397240f (patch)
treeab3476c8b19b4d0bdce94c7119a230c56ef9cebf /sonar-duplications
parentf185818faa11af412a75e33bf844bd3f21929e03 (diff)
downloadsonarqube-90a6c00024ca6fc4476c6ecf7ab30e44e397240f.tar.gz
sonarqube-90a6c00024ca6fc4476c6ecf7ab30e44e397240f.zip
SONAR-3181,SONAR-3139 Allow filtering by number of tokens
To do this each block stores two additional integers - index for first and last token. So total size of in-memory storage has been increased.
Diffstat (limited to 'sonar-duplications')
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/CodeFragment.java46
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java68
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java7
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java79
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java30
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java56
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java33
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/PmdBlockChunker.java80
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java45
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokensLine.java69
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java13
-rw-r--r--sonar-duplications/src/test/java/org/sonar/duplications/cpd/CPDTest.java4
-rw-r--r--sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBlockChunkerTest.java56
-rw-r--r--sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java102
-rw-r--r--sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java65
15 files changed, 637 insertions, 116 deletions
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/CodeFragment.java b/sonar-duplications/src/main/java/org/sonar/duplications/CodeFragment.java
new file mode 100644
index 00000000000..e908f3a5060
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/CodeFragment.java
@@ -0,0 +1,46 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2012 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications;
+
+import com.google.common.annotations.Beta;
+
+/**
+ * TODO Enforce contracts of this interface in concrete classes by using preconditions, currently this leads to failures of tests.
+ *
+ * <p>This interface is not intended to be implemented by clients.</p>
+ *
+ * @since 2.14
+ */
+@Beta
+public interface CodeFragment {
+
+ /**
+ * Number of line where fragment starts.
+ * Numbering starts from 1.
+ */
+ int getStartLine();
+
+ /**
+ * Number of line where fragment ends.
+ * Numbering starts from 1.
+ */
+ int getEndLine();
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java b/sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java
index 7a1f659b55d..fb98dd10732 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java
@@ -19,11 +19,14 @@
*/
package org.sonar.duplications.block;
+import com.google.common.annotations.Beta;
+import org.sonar.duplications.CodeFragment;
+
/**
* Represents part of source code between two lines.
* If two blocks have the same {@link #getBlockHash() hash}, then we assume that there is a duplication in a code, which they represent.
*/
-public final class Block {
+public final class Block implements CodeFragment {
private final String resourceId;
private final ByteArray blockHash;
@@ -64,14 +67,69 @@ public final class Block {
return indexInFile;
}
+ private int startUnit;
+ private int endUnit;
+
+ /**
+ * @since 2.14
+ */
+ @Beta
+ public int getStartUnit() {
+ return startUnit;
+ }
+
+ /**
+ * @since 2.14
+ */
+ @Beta
+ public int getEndUnit() {
+ return endUnit;
+ }
+
+ /**
+ * TODO get rid of this method, otherwise class is not immutable
+ *
+ * @see #getStartUnit()
+ * @since 2.14
+ */
+ @Beta
+ public void setStartUnit(int startUnit) {
+ this.startUnit = startUnit;
+ }
+
+ /**
+ * TODO get rid of this method, otherwise class is not immutable
+ *
+ * @see #getEndUnit()
+ * @since 2.14
+ */
+ @Beta
+ public void setEndUnit(int endUnit) {
+ this.endUnit = endUnit;
+ }
+
+ /**
+ * @deprecated in 2.14, use {@link #getStartLine()} instead
+ */
public int getFirstLineNumber() {
return firstLineNumber;
}
+ /**
+ * @deprecated in 2.14, use {@link #getEndLine()} instead
+ */
public int getLastLineNumber() {
return lastLineNumber;
}
+ public int getStartLine() {
+ return firstLineNumber;
+ }
+
+ public int getEndLine() {
+ return lastLineNumber;
+ }
+
@Override
public boolean equals(Object obj) {
if (!(obj instanceof Block)) {
@@ -79,10 +137,10 @@ public final class Block {
}
Block other = (Block) obj;
return resourceId.equals(other.resourceId)
- && blockHash.equals(other.blockHash)
- && indexInFile == other.indexInFile
- && firstLineNumber == other.firstLineNumber
- && lastLineNumber == other.lastLineNumber;
+ && blockHash.equals(other.blockHash)
+ && indexInFile == other.indexInFile
+ && firstLineNumber == other.firstLineNumber
+ && lastLineNumber == other.lastLineNumber;
}
@Override
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java b/sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java
index cbb8db112f9..3abc0742493 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java
@@ -19,12 +19,11 @@
*/
package org.sonar.duplications.block;
-import java.util.Collections;
-import java.util.List;
-
+import com.google.common.collect.Lists;
import org.sonar.duplications.statement.Statement;
-import com.google.common.collect.Lists;
+import java.util.Collections;
+import java.util.List;
/**
* Creates blocks from statements, each block will contain specified number of statements (<code>blockSize</code>) and 64-bits (8-bytes) hash value.
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java
index c72d6063ab2..1b9f5f8b164 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java
@@ -19,16 +19,15 @@
*/
package org.sonar.duplications.detector.suffixtree;
-import java.util.Collections;
-import java.util.List;
-
+import com.google.common.collect.Lists;
import org.sonar.duplications.block.Block;
import org.sonar.duplications.detector.ContainsInComparator;
import org.sonar.duplications.index.CloneGroup;
import org.sonar.duplications.index.ClonePart;
import org.sonar.duplications.utils.SortedListsUtils;
-import com.google.common.collect.Lists;
+import java.util.Collections;
+import java.util.List;
/**
* Implementation of {@link Search.Collector}, which constructs {@link CloneGroup}s.
@@ -41,8 +40,8 @@ public class DuplicationsCollector extends Search.Collector {
private final List<CloneGroup> filtered = Lists.newArrayList();
private int length;
- private ClonePart origin;
- private List<ClonePart> parts;
+ private int count;
+ private int[][] blockNumbers;
public DuplicationsCollector(TextSet text) {
this.text = text;
@@ -58,38 +57,22 @@ public class DuplicationsCollector extends Search.Collector {
@Override
public void startOfGroup(int size, int length) {
+ this.blockNumbers = new int[size][2];
this.length = length;
- this.parts = Lists.newArrayListWithCapacity(size);
}
/**
* Constructs ClonePart and saves it for future processing in {@link #endOfGroup()}.
- *
+ *
* @param start number of first block from text for this part
* @param end number of last block from text for this part
* @param len number of blocks in this part
*/
@Override
public void part(int start, int end) {
- Block firstBlock = text.getBlock(start);
- Block lastBlock = text.getBlock(end - 1);
-
- ClonePart part = new ClonePart(
- firstBlock.getResourceId(),
- firstBlock.getIndexInFile(),
- firstBlock.getFirstLineNumber(),
- lastBlock.getLastLineNumber());
-
- // TODO Godin: maybe use FastStringComparator here ?
- if (originResourceId.equals(part.getResourceId())) { // part from origin
- if (origin == null) {
- origin = part;
- } else if (part.getUnitStart() < origin.getUnitStart()) {
- origin = part;
- }
- }
-
- parts.add(part);
+ blockNumbers[count][0] = start;
+ blockNumbers[count][1] = end - 1;
+ count++;
}
/**
@@ -97,12 +80,48 @@ public class DuplicationsCollector extends Search.Collector {
*/
@Override
public void endOfGroup() {
+ int lengthInUnits = 0;
+ ClonePart origin = null;
+ List<ClonePart> parts = Lists.newArrayListWithCapacity(count);
+ for (int[] b : blockNumbers) {
+ Block firstBlock = text.getBlock(b[0]);
+ Block lastBlock = text.getBlock(b[1]);
+ ClonePart part = new ClonePart(
+ firstBlock.getResourceId(),
+ firstBlock.getIndexInFile(),
+ firstBlock.getFirstLineNumber(),
+ lastBlock.getLastLineNumber());
+
+ // TODO Godin: maybe use FastStringComparator here ?
+ if (originResourceId.equals(part.getResourceId())) { // part from origin
+ if (origin == null) {
+ origin = part;
+ // To calculate length important to use the origin, because otherwise block may come from DB without required data
+ lengthInUnits = lastBlock.getEndUnit() - firstBlock.getStartUnit() + 1;
+ } else if (part.getUnitStart() < origin.getUnitStart()) {
+ origin = part;
+ }
+ }
+
+ parts.add(part);
+ }
+
Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR);
+
CloneGroup group = new CloneGroup(length, origin, parts);
+ group.setLengthInUnits(lengthInUnits);
+
filter(group);
- parts = null;
- origin = null;
+ reset();
+ }
+
+ /**
+ * Prepare for processing of next duplication.
+ */
+ private void reset() {
+ blockNumbers = null;
+ count = 0;
}
/**
@@ -159,7 +178,7 @@ public class DuplicationsCollector extends Search.Collector {
// TODO Godin: according to tests seems that if first part of condition is true, then second part can not be false
// if this can be proved, then second part can be removed
return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(second.getCloneUnitLength(), first.getCloneUnitLength()))
- && SortedListsUtils.contains(firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR);
+ && SortedListsUtils.contains(firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR);
}
}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java
index 8c662be34aa..dfb86bb93d6 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java
@@ -19,10 +19,11 @@
*/
package org.sonar.duplications.index;
-import java.util.List;
-
+import com.google.common.annotations.Beta;
import com.google.common.collect.ImmutableList;
+import java.util.List;
+
/**
* Groups a set of related {@link ClonePart}s.
*/
@@ -47,8 +48,31 @@ public class CloneGroup {
return originPart;
}
+ private int length;
+
+ /**
+ * Length of duplication measured in original units, e.g. for token-based detection - in tokens.
+ *
+ * @since 2.14
+ */
+ @Beta
+ public int getLengthInUnits() {
+ return length;
+ }
+
+ /**
+ * TODO get rid of this method, otherwise class is not immutable
+ *
+ * @since 2.14
+ * @see #getLengthInUnits()
+ */
+ @Beta
+ public void setLengthInUnits(int length) {
+ this.length = length;
+ }
+
/**
- * @return clone length in units (not in lines)
+ * @return clone length in {@link org.sonar.duplications.block.Block}s
*/
public int getCloneUnitLength() {
return cloneLength;
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java
index 274b3708ef1..36648b19d25 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java
@@ -19,23 +19,25 @@
*/
package org.sonar.duplications.index;
-public class ClonePart {
+import org.sonar.duplications.CodeFragment;
+
+public class ClonePart implements CodeFragment {
private final String resourceId;
- private final int unitStart;
- private final int lineStart;
- private final int lineEnd;
+ private final int startUnit;
+ private final int startLine;
+ private final int endLine;
/**
* Cache for hash code.
*/
private int hash;
- public ClonePart(String resourceId, int unitStart, int lineStart, int lineEnd) {
+ public ClonePart(String resourceId, int startUnit, int startLine, int endLine) {
this.resourceId = resourceId;
- this.unitStart = unitStart;
- this.lineStart = lineStart;
- this.lineEnd = lineEnd;
+ this.startUnit = startUnit;
+ this.startLine = startLine;
+ this.endLine = endLine;
}
public String getResourceId() {
@@ -43,19 +45,35 @@ public class ClonePart {
}
public int getUnitStart() {
- return unitStart;
+ return startUnit;
}
+ /**
+ * @deprecated in 2.14, use {@link #getStartLine()} instead
+ */
+ @Deprecated
public int getLineStart() {
- return lineStart;
+ return startLine;
}
+ /**
+ * @deprecated in 2.14, use {@link #getEndLine()} instead
+ */
+ @Deprecated
public int getLineEnd() {
- return lineEnd;
+ return endLine;
+ }
+
+ public int getStartLine() {
+ return startLine;
+ }
+
+ public int getEndLine() {
+ return endLine;
}
public int getLines() {
- return lineEnd - lineStart + 1;
+ return endLine - startLine + 1;
}
@Override
@@ -63,9 +81,9 @@ public class ClonePart {
if (obj instanceof ClonePart) {
ClonePart another = (ClonePart) obj;
return another.resourceId.equals(resourceId)
- && another.lineStart == lineStart
- && another.lineEnd == lineEnd
- && another.unitStart == unitStart;
+ && another.startLine == startLine
+ && another.endLine == endLine
+ && another.startUnit == startUnit;
}
return false;
}
@@ -75,9 +93,9 @@ public class ClonePart {
int h = hash;
if (h == 0) {
h = resourceId.hashCode();
- h = 31 * h + lineStart;
- h = 31 * h + lineEnd;
- h = 31 * h + unitStart;
+ h = 31 * h + startLine;
+ h = 31 * h + endLine;
+ h = 31 * h + startUnit;
hash = h;
}
return h;
@@ -85,7 +103,7 @@ public class ClonePart {
@Override
public String toString() {
- return "'" + resourceId + "':[" + unitStart + "|" + lineStart + "-" + lineEnd + "]";
+ return "'" + resourceId + "':[" + startUnit + "|" + startLine + "-" + endLine + "]";
}
}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java
index 4223c8e95a7..cb721a0cc78 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java
@@ -19,14 +19,13 @@
*/
package org.sonar.duplications.index;
-import java.util.Collection;
-import java.util.List;
-
+import com.google.common.collect.Lists;
import org.sonar.duplications.block.Block;
import org.sonar.duplications.block.ByteArray;
import org.sonar.duplications.utils.FastStringComparator;
-import com.google.common.collect.Lists;
+import java.util.Collection;
+import java.util.List;
/**
* Provides an index optimized by memory.
@@ -46,7 +45,7 @@ public class PackedMemoryCloneIndex extends AbstractCloneIndex {
private static final int DEFAULT_INITIAL_CAPACITY = 1024;
- private static final int BLOCK_INTS = 3;
+ private static final int BLOCK_INTS = 5;
private final int hashInts;
@@ -111,9 +110,14 @@ public class PackedMemoryCloneIndex extends AbstractCloneIndex {
}
int indexInFile = blockData[offset++];
int firstLineNumber = blockData[offset++];
- int lastLineNumber = blockData[offset];
+ int lastLineNumber = blockData[offset++];
+ int startUnit = blockData[offset++];
+ int endUnit = blockData[offset];
- result.add(new Block(resourceId, new ByteArray(hash), indexInFile, firstLineNumber, lastLineNumber));
+ Block block = new Block(resourceId, new ByteArray(hash), indexInFile, firstLineNumber, lastLineNumber);
+ block.setStartUnit(startUnit);
+ block.setEndUnit(endUnit);
+ result.add(block);
index++;
realIndex = resourceIdsIndex[index];
@@ -146,9 +150,14 @@ public class PackedMemoryCloneIndex extends AbstractCloneIndex {
offset = index * blockInts + hashInts;
int indexInFile = blockData[offset++];
int firstLineNumber = blockData[offset++];
- int lastLineNumber = blockData[offset];
-
- result.add(new Block(resourceId, sequenceHash, indexInFile, firstLineNumber, lastLineNumber));
+ int lastLineNumber = blockData[offset++];
+ int startUnit = blockData[offset++];
+ int endUnit = blockData[offset];
+
+ Block block = new Block(resourceId, sequenceHash, indexInFile, firstLineNumber, lastLineNumber);
+ block.setStartUnit(startUnit);
+ block.setEndUnit(endUnit);
+ result.add(block);
index++;
}
return result;
@@ -176,7 +185,9 @@ public class PackedMemoryCloneIndex extends AbstractCloneIndex {
}
blockData[offset++] = block.getIndexInFile();
blockData[offset++] = block.getFirstLineNumber();
- blockData[offset] = block.getLastLineNumber();
+ blockData[offset++] = block.getLastLineNumber();
+ blockData[offset++] = block.getStartUnit();
+ blockData[offset] = block.getEndUnit();
size++;
}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/PmdBlockChunker.java b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/PmdBlockChunker.java
new file mode 100644
index 00000000000..58713b204f5
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/PmdBlockChunker.java
@@ -0,0 +1,80 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2012 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.internal.pmd;
+
+import com.google.common.collect.Lists;
+import org.sonar.duplications.block.Block;
+import org.sonar.duplications.block.BlockChunker;
+import org.sonar.duplications.block.ByteArray;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Differences with {@link BlockChunker}:
+ * works with {@link TokensLine},
+ * sets {@link Block#setStartUnit(int)} and {@link Block#setEndUnit(int)} - indexes of first and last token for this block.
+ */
+public class PmdBlockChunker {
+
+ private static final long PRIME_BASE = 31;
+
+ private final int blockSize;
+ private final long power;
+
+ public PmdBlockChunker(int blockSize) {
+ this.blockSize = blockSize;
+
+ long pow = 1;
+ for (int i = 0; i < blockSize - 1; i++) {
+ pow = pow * PRIME_BASE;
+ }
+ this.power = pow;
+ }
+
+ public List<Block> chunk(String resourceId, List<TokensLine> fragments) {
+ if (fragments.size() < blockSize) {
+ return Collections.emptyList();
+ }
+ TokensLine[] fragmentsArr = fragments.toArray(new TokensLine[fragments.size()]);
+ List<Block> blocks = Lists.newArrayListWithCapacity(fragmentsArr.length - blockSize + 1);
+ long hash = 0;
+ int first = 0;
+ int last = 0;
+ for (; last < blockSize - 1; last++) {
+ hash = hash * PRIME_BASE + fragmentsArr[last].getHashCode();
+ }
+ for (; last < fragmentsArr.length; last++, first++) {
+ TokensLine firstFragment = fragmentsArr[first];
+ TokensLine lastFragment = fragmentsArr[last];
+ // add last statement to hash
+ hash = hash * PRIME_BASE + lastFragment.getHashCode();
+ // create block
+ Block block = new Block(resourceId, new ByteArray(hash), first, firstFragment.getStartLine(), lastFragment.getEndLine());
+ block.setStartUnit(firstFragment.getStartUnit());
+ block.setEndUnit(lastFragment.getEndUnit());
+ blocks.add(block);
+ // remove first statement from hash
+ hash -= power * firstFragment.getHashCode();
+ }
+ return blocks;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java
index 777b4d53d48..57e84688d76 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java
@@ -25,36 +25,45 @@ import net.sourceforge.pmd.cpd.SourceCode;
import net.sourceforge.pmd.cpd.TokenEntry;
import net.sourceforge.pmd.cpd.Tokenizer;
import net.sourceforge.pmd.cpd.Tokens;
+import org.sonar.duplications.block.Block;
import org.sonar.duplications.cpd.FileCodeLoaderWithoutCache;
-import org.sonar.duplications.statement.Statement;
import java.io.File;
import java.io.IOException;
import java.util.List;
/**
- * Bridge, which allows to convert list of {@link TokenEntry} produced by {@link Tokenizer} into list of {@link Statement}s.
- * Principle of conversion - statement formed from tokens of one line.
+ * Bridge, which allows to convert list of {@link TokenEntry} produced by {@link Tokenizer} into list of {@link TokensLine}s.
*/
public class TokenizerBridge {
+ private static final int BLOCK_SIZE = 10;
+
private final Tokenizer tokenizer;
private final String encoding;
+ private final PmdBlockChunker blockBuilder;
public TokenizerBridge(Tokenizer tokenizer, String encoding) {
this.tokenizer = tokenizer;
this.encoding = encoding;
- clearCache();
+ this.blockBuilder = new PmdBlockChunker(BLOCK_SIZE);
+ }
+
+ // TODO remove from here
+ public List<Block> chunk(String resourceId, File file) {
+ return blockBuilder.chunk(resourceId, chunk(file));
}
- public List<Statement> tokenize(File file) {
+ public List<TokensLine> chunk(File file) {
SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding));
Tokens tokens = new Tokens();
+ TokenEntry.clearImages();
try {
tokenizer.tokenize(sourceCode, tokens);
} catch (IOException e) {
throw Throwables.propagate(e);
}
+ TokenEntry.clearImages();
return convert(tokens.getTokens());
}
@@ -62,34 +71,34 @@ public class TokenizerBridge {
* We expect that implementation of {@link Tokenizer} is correct:
* tokens ordered by occurrence in source code and last token is EOF.
*/
- private static List<Statement> convert(List<TokenEntry> tokens) {
- ImmutableList.Builder<Statement> result = ImmutableList.builder();
- int currentLine = Integer.MIN_VALUE;
+ private static List<TokensLine> convert(List<TokenEntry> tokens) {
+ ImmutableList.Builder<TokensLine> result = ImmutableList.builder();
StringBuilder sb = new StringBuilder();
+ int startLine = Integer.MIN_VALUE;
+ int startIndex = 0;
+ int currentIndex = 0;
for (TokenEntry token : tokens) {
if (token != TokenEntry.EOF) {
String value = token.getValue();
int line = token.getBeginLine();
- if (line != currentLine) {
- addNewStatement(result, currentLine, sb);
- currentLine = line;
+ if (line != startLine) {
+ addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
+ startIndex = currentIndex + 1;
+ startLine = line;
}
+ currentIndex++;
sb.append(value);
}
}
- addNewStatement(result, currentLine, sb);
+ addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
return result.build();
}
- private static void addNewStatement(ImmutableList.Builder<Statement> result, int line, StringBuilder sb) {
+ private static void addNewTokensLine(ImmutableList.Builder<TokensLine> result, int startUnit, int endUnit, int startLine, StringBuilder sb) {
if (sb.length() != 0) {
- result.add(new Statement(line, line, sb.toString()));
+ result.add(new TokensLine(startUnit, endUnit, startLine, sb.toString().hashCode()));
sb.setLength(0);
}
}
- public void clearCache() {
- TokenEntry.clearImages();
- }
-
}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokensLine.java b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokensLine.java
new file mode 100644
index 00000000000..2f832bb50af
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokensLine.java
@@ -0,0 +1,69 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2012 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.internal.pmd;
+
+import com.google.common.base.Preconditions;
+import org.sonar.duplications.CodeFragment;
+
+/**
+ * Immutable code fragment, which formed from tokens of one line.
+ */
+class TokensLine implements CodeFragment {
+
+ private final int startLine;
+ private final int hashCode;
+
+ private final int startUnit;
+ private final int endUnit;
+
+ public TokensLine(int startUnit, int endUnit, int startLine, int hashCode) {
+ Preconditions.checkArgument(startLine > 0);
+ // TODO do we have requirements for length and hashcode ?
+ this.startLine = startLine;
+ this.hashCode = hashCode;
+
+ this.startUnit = startUnit;
+ this.endUnit = endUnit;
+ }
+
+ public int getStartLine() {
+ return startLine;
+ }
+
+ /**
+ * Same as {@link #getStartLine()}
+ */
+ public int getEndLine() {
+ return startLine;
+ }
+
+ public int getHashCode() {
+ return hashCode;
+ }
+
+ public int getStartUnit() {
+ return startUnit;
+ }
+
+ public int getEndUnit() {
+ return endUnit;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java
index a154792dfb5..7f8a9c473b5 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java
@@ -19,11 +19,12 @@
*/
package org.sonar.duplications.statement;
-import java.util.List;
-
+import org.sonar.duplications.CodeFragment;
import org.sonar.duplications.token.Token;
-public class Statement {
+import java.util.List;
+
+public class Statement implements CodeFragment {
private final int startLine;
private final int endLine;
@@ -84,8 +85,8 @@ public class Statement {
}
Statement other = (Statement) obj;
return startLine == other.startLine
- && endLine == other.endLine
- && value.equals(other.value);
+ && endLine == other.endLine
+ && value.equals(other.value);
}
@Override
@@ -93,4 +94,4 @@ public class Statement {
return "[" + getStartLine() + "-" + getEndLine() + "] [" + getValue() + "]";
}
-} \ No newline at end of file
+}
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/cpd/CPDTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/cpd/CPDTest.java
index d825084dacf..c7133c4387a 100644
--- a/sonar-duplications/src/test/java/org/sonar/duplications/cpd/CPDTest.java
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/cpd/CPDTest.java
@@ -57,8 +57,9 @@ public class CPDTest {
assertThat(match.getLineCount(), is(26));
assertThat(match.getFirstMark().getBeginLine(), is(16));
assertThat(match.getSourceCodeSlice(), is(nullValue()));
+ assertThat(match.getTokenCount(), is(116));
}
-
+
@Test
public void testDuplicationOnSameFile() throws IOException {
TokenEntry.clearImages();
@@ -77,6 +78,7 @@ public class CPDTest {
assertThat(match.getLineCount(), is(16));
assertThat(match.getFirstMark().getBeginLine(), is(29));
assertThat(match.getSourceCodeSlice(), is(nullValue()));
+ assertThat(match.getTokenCount(), is(160));
}
private List<Match> getMatches(CPD cpd) {
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBlockChunkerTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBlockChunkerTest.java
new file mode 100644
index 00000000000..50b17be524c
--- /dev/null
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBlockChunkerTest.java
@@ -0,0 +1,56 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2012 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.internal.pmd;
+
+import org.junit.Test;
+import org.sonar.duplications.block.Block;
+import org.sonar.duplications.block.ByteArray;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertThat;
+
+public class PmdBlockChunkerTest {
+
+ @Test
+ public void shouldBuildBlocks() {
+ TokensLine line1 = new TokensLine(0, 9, 1, 1);
+ TokensLine line2 = new TokensLine(10, 19, 2, 2);
+ TokensLine line3 = new TokensLine(20, 29, 3, 3);
+
+ List<Block> blocks = new PmdBlockChunker(2).chunk("resourceId", Arrays.asList(line1, line2, line3));
+ assertThat(blocks.size(), is(2));
+
+ Block block = blocks.get(0);
+ // assertThat(block.getLengthInUnits(), is(11));
+ assertThat(block.getStartLine(), is(1));
+ assertThat(block.getEndLine(), is(2));
+ assertThat(block.getBlockHash(), is(new ByteArray(1L * 31 + 2)));
+
+ block = blocks.get(1);
+ // assertThat(block.getLengthInUnits(), is(33));
+ assertThat(block.getStartLine(), is(2));
+ assertThat(block.getEndLine(), is(3));
+ assertThat(block.getBlockHash(), is(new ByteArray(2L * 31 + 3)));
+ }
+
+}
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java
new file mode 100644
index 00000000000..60c22a51f85
--- /dev/null
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java
@@ -0,0 +1,102 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2012 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.internal.pmd;
+
+import net.sourceforge.pmd.cpd.JavaTokenizer;
+import org.junit.Before;
+import org.junit.Test;
+import org.sonar.duplications.block.Block;
+import org.sonar.duplications.detector.suffixtree.SuffixTreeCloneDetectionAlgorithm;
+import org.sonar.duplications.index.CloneGroup;
+import org.sonar.duplications.index.CloneIndex;
+import org.sonar.duplications.index.ClonePart;
+import org.sonar.duplications.index.PackedMemoryCloneIndex;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.List;
+
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertThat;
+
+public class PmdBridgeTest {
+
+ private CloneIndex index;
+ private TokenizerBridge bridge;
+
+ @Before
+ public void setUp() {
+ index = new PackedMemoryCloneIndex();
+ bridge = new TokenizerBridge(new JavaTokenizer(), "UTF-8");
+ }
+
+ @Test
+ public void testDuplicationInSingleFile() {
+ File file = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile3.java");
+ addToIndex(file);
+
+ List<CloneGroup> duplications = detect(file);
+ assertThat(duplications.size(), is(1));
+
+ CloneGroup duplication = duplications.get(0);
+ assertThat(duplication.getOriginPart().getResourceId(), is(file.getAbsolutePath()));
+ assertThat(duplication.getCloneParts().size(), is(2));
+ assertThat("length in tokens", duplication.getLengthInUnits(), is(157));
+
+ ClonePart part = duplication.getCloneParts().get(0);
+ assertThat(part.getResourceId(), is(file.getAbsolutePath()));
+ assertThat(part.getStartLine(), is(30));
+ assertThat(part.getEndLine(), is(44));
+ }
+
+ @Test
+ public void testDuplicationBetweenTwoFiles() {
+ File file1 = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile1.java");
+ File file2 = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile2.java");
+ addToIndex(file1);
+ addToIndex(file2);
+
+ List<CloneGroup> duplications = detect(file1);
+ assertThat(duplications.size(), is(1));
+
+ CloneGroup duplication = duplications.get(0);
+ assertThat(duplication.getOriginPart().getResourceId(), is(file1.getAbsolutePath()));
+ assertThat(duplication.getCloneParts().size(), is(2));
+ assertThat("length in tokens", duplication.getLengthInUnits(), is(115));
+
+ ClonePart part = duplication.getCloneParts().get(0);
+ assertThat(part.getResourceId(), is(file1.getAbsolutePath()));
+ assertThat(part.getStartLine(), is(18));
+ assertThat(part.getEndLine(), is(41));
+ }
+
+ private List<CloneGroup> detect(File file) {
+ Collection<Block> fileBlocks = index.getByResourceId(file.getAbsolutePath());
+ return SuffixTreeCloneDetectionAlgorithm.detect(index, fileBlocks);
+ }
+
+ private void addToIndex(File file) {
+ List<Block> blocks = bridge.chunk(file.getAbsolutePath(), file);
+ for (Block block : blocks) {
+ index.insert(block);
+ }
+ }
+
+}
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java
index 0d56c5f2da3..ec2bd5d0d9b 100644
--- a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java
@@ -23,8 +23,8 @@ import net.sourceforge.pmd.cpd.SourceCode;
import net.sourceforge.pmd.cpd.TokenEntry;
import net.sourceforge.pmd.cpd.Tokenizer;
import net.sourceforge.pmd.cpd.Tokens;
+import org.junit.Before;
import org.junit.Test;
-import org.sonar.duplications.statement.Statement;
import java.io.IOException;
import java.util.List;
@@ -34,8 +34,10 @@ import static org.junit.Assert.assertThat;
public class TokenizerBridgeTest {
- @Test
- public void test() {
+ private TokenizerBridge bridge;
+
+ @Before
+ public void setUp() {
Tokenizer tokenizer = new Tokenizer() {
public void tokenize(SourceCode tokens, Tokens tokenEntries) throws IOException {
tokenEntries.add(new TokenEntry("t1", "src", 1));
@@ -43,30 +45,55 @@ public class TokenizerBridgeTest {
tokenEntries.add(new TokenEntry("t3", "src", 2));
tokenEntries.add(new TokenEntry("t1", "src", 4));
tokenEntries.add(new TokenEntry("t3", "src", 4));
+ tokenEntries.add(new TokenEntry("t3", "src", 4));
tokenEntries.add(TokenEntry.getEOF());
}
};
+ bridge = new TokenizerBridge(tokenizer, "UTF-8");
+ }
+
+ @Test
+ public void shouldClearCacheInTokenEntry() {
+ bridge.chunk(null);
+ TokenEntry token = new TokenEntry("image", "srcId", 0);
+ assertThat(token.getIndex(), is(0));
+ assertThat(token.getIdentifier(), is(1));
+ }
+
+ @Test
+ public void test() {
+ // To be sure that token index will be relative to file - run twice:
+ bridge.chunk(null);
+ List<TokensLine> lines = bridge.chunk(null);
+
+ assertThat(lines.size(), is(3));
+
+ TokensLine line = lines.get(0);
+ // 2 tokens on 1 line
+ assertThat(line.getStartUnit(), is(1));
+ assertThat(line.getEndUnit(), is(2));
- TokenizerBridge bridge = new TokenizerBridge(tokenizer, "UTF-8");
- List<Statement> statements = bridge.tokenize(null);
- bridge.clearCache();
+ assertThat(line.getStartLine(), is(1));
+ assertThat(line.getEndLine(), is(1));
+ assertThat(line.getHashCode(), is("t1t2".hashCode()));
- assertThat(statements.size(), is(3));
+ line = lines.get(1);
+ // 1 token on 2 line
+ assertThat(line.getStartUnit(), is(3));
+ assertThat(line.getEndUnit(), is(3));
- Statement statement = statements.get(0);
- assertThat(statement.getStartLine(), is(1));
- assertThat(statement.getEndLine(), is(1));
- assertThat(statement.getValue(), is("t1t2"));
+ assertThat(line.getStartLine(), is(2));
+ assertThat(line.getEndLine(), is(2));
+ assertThat(line.getHashCode(), is("t3".hashCode()));
- statement = statements.get(1);
- assertThat(statement.getStartLine(), is(2));
- assertThat(statement.getEndLine(), is(2));
- assertThat(statement.getValue(), is("t3"));
+ line = lines.get(2);
+ // 3 tokens on 4 line
+ assertThat(line.getStartUnit(), is(4));
+ assertThat(line.getEndUnit(), is(6));
- statement = statements.get(2);
- assertThat(statement.getStartLine(), is(4));
- assertThat(statement.getEndLine(), is(4));
- assertThat(statement.getValue(), is("t1t3"));
+ assertThat(line.getStartLine(), is(4));
+ assertThat(line.getEndLine(), is(4));
+ assertThat(line.getHashCode(), is("t1t3t3".hashCode()));
}
}