aboutsummaryrefslogtreecommitdiffstats
path: root/sonar-duplications/src/main
diff options
context:
space:
mode:
Diffstat (limited to 'sonar-duplications/src/main')
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/DuplicationsException.java31
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java107
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java87
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/block/ByteArray.java131
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/detector/original/BlocksGroup.java212
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java154
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/detector/original/OriginalCloneDetectionAlgorithm.java226
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/index/AbstractCloneIndex.java24
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java107
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/index/CloneIndex.java48
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java91
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/index/DataUtils.java116
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/index/MemoryCloneIndex.java84
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/index/MemoryCloneIndex2.java65
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java280
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/java/JavaStatementBuilder.java50
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/java/JavaTokenProducer.java74
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java96
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChannel.java67
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChannelDisptacher.java54
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChunker.java95
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/statement/TokenMatcherFactory.java65
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/AnyTokenMatcher.java41
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/BridgeTokenMatcher.java64
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/ExactTokenMatcher.java50
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/ForgetLastTokenMatcher.java41
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/OptTokenMatcher.java50
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/TokenMatcher.java36
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/UptoTokenMatcher.java61
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/statement/package-info.java27
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/token/BlackHoleTokenChannel.java35
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/token/Token.java77
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChannel.java59
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java141
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/token/TokenQueue.java85
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/token/package-info.java27
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/utils/FastStringComparator.java57
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/utils/SortedListsUtils.java62
38 files changed, 3177 insertions, 0 deletions
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/DuplicationsException.java b/sonar-duplications/src/main/java/org/sonar/duplications/DuplicationsException.java
new file mode 100644
index 00000000000..27e7dcdc223
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/DuplicationsException.java
@@ -0,0 +1,31 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications;
+
+public class DuplicationsException extends RuntimeException {
+
+ public DuplicationsException(String message) {
+ super(message);
+ }
+
+ public DuplicationsException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java b/sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java
new file mode 100644
index 00000000000..aac5a806ee6
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java
@@ -0,0 +1,107 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.block;
+
+/**
+ * Represents part of source code between two lines.
+ * If two blocks have the same {@link #getBlockHash() hash}, then we assume that there is a duplication in a code, which they represent.
+ */
+public final class Block {
+
+ private final String resourceId;
+ private final ByteArray blockHash;
+ private final int indexInFile;
+ private final int firstLineNumber;
+ private final int lastLineNumber;
+
+ /**
+ * Cache for hash code.
+ */
+ private int hash;
+
+ public Block(String resourceId, ByteArray blockHash, int indexInFile, int firstLineNumber, int lastLineNumber) {
+ this.resourceId = resourceId;
+ this.blockHash = blockHash;
+ this.indexInFile = indexInFile;
+ this.firstLineNumber = firstLineNumber;
+ this.lastLineNumber = lastLineNumber;
+ }
+
+ public Block(int indexInFile, int firstLineNumber, int lastLineNumber, String resourceId, String hash) {
+ this(resourceId, new ByteArray(hash), indexInFile, firstLineNumber, lastLineNumber);
+ }
+
+ public String getHashHex() {
+ return getBlockHash().toString();
+ }
+
+ public String getResourceId() {
+ return resourceId;
+ }
+
+ public ByteArray getBlockHash() {
+ return blockHash;
+ }
+
+ public int getIndexInFile() {
+ return indexInFile;
+ }
+
+ public int getFirstLineNumber() {
+ return firstLineNumber;
+ }
+
+ public int getLastLineNumber() {
+ return lastLineNumber;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof Block)) {
+ return false;
+ }
+ Block other = (Block) obj;
+ return resourceId.equals(other.resourceId)
+ && blockHash.equals(other.blockHash)
+ && indexInFile == other.indexInFile
+ && firstLineNumber == other.firstLineNumber
+ && lastLineNumber == other.lastLineNumber;
+ }
+
+ @Override
+ public int hashCode() {
+ int h = hash;
+ if (h == 0) {
+ h = resourceId.hashCode();
+ h = 31 * h + blockHash.hashCode();
+ h = 31 * h + indexInFile;
+ h = 31 * h + firstLineNumber;
+ h = 31 * h + lastLineNumber;
+ hash = h;
+ }
+ return h;
+ }
+
+ @Override
+ public String toString() {
+ return "'" + resourceId + "'[" + indexInFile + "|" + firstLineNumber + "-" + lastLineNumber + "]:" + blockHash;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java b/sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java
new file mode 100644
index 00000000000..2232a3f9a71
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java
@@ -0,0 +1,87 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.block;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.sonar.duplications.statement.Statement;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Creates blocks from statements, each block will contain specified number of statements (<code>blockSize</code>) and 64-bits (8-bytes) hash value.
+ * Hash value computed using
+ * <a href="http://en.wikipedia.org/wiki/Rolling_hash#Rabin-Karp_rolling_hash">Rabin-Karp rolling hash</a> :
+ * <blockquote><pre>
+ * s[0]*31^(blockSize-1) + s[1]*31^(blockSize-2) + ... + s[blockSize-1]
+ * </pre></blockquote>
+ * using <code>long</code> arithmetic, where <code>s[i]</code>
+ * is the hash code of <code>String</code> (which is cached) for statement with number i.
+ * Thus running time - O(N), where N - number of statements.
+ * Implementation fully thread-safe.
+ */
+public class BlockChunker {
+
+ private static final long PRIME_BASE = 31;
+
+ private final int blockSize;
+ private final long power;
+
+ public BlockChunker(int blockSize) {
+ this.blockSize = blockSize;
+
+ long pow = 1;
+ for (int i = 0; i < blockSize - 1; i++) {
+ pow = pow * PRIME_BASE;
+ }
+ this.power = pow;
+ }
+
+ public List<Block> chunk(String resourceId, List<Statement> statements) {
+ if (statements.size() < blockSize) {
+ return Collections.emptyList();
+ }
+ Statement[] statementsArr = statements.toArray(new Statement[statements.size()]);
+ List<Block> blocks = Lists.newArrayListWithCapacity(statementsArr.length - blockSize + 1);
+ long hash = 0;
+ int first = 0;
+ int last = 0;
+ for (; last < blockSize - 1; last++) {
+ hash = hash * PRIME_BASE + statementsArr[last].getValue().hashCode();
+ }
+ for (; last < statementsArr.length; last++, first++) {
+ Statement firstStatement = statementsArr[first];
+ Statement lastStatement = statementsArr[last];
+ // add last statement to hash
+ hash = hash * PRIME_BASE + lastStatement.getValue().hashCode();
+ // create block
+ blocks.add(new Block(resourceId, new ByteArray(hash), first, firstStatement.getStartLine(), lastStatement.getEndLine()));
+ // remove first statement from hash
+ hash -= power * firstStatement.getValue().hashCode();
+ }
+ return blocks;
+ }
+
+ public int getBlockSize() {
+ return blockSize;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/block/ByteArray.java b/sonar-duplications/src/main/java/org/sonar/duplications/block/ByteArray.java
new file mode 100644
index 00000000000..d3d97a19631
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/block/ByteArray.java
@@ -0,0 +1,131 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.block;
+
+import java.nio.ByteBuffer;
+import java.nio.IntBuffer;
+import java.util.Arrays;
+
+/**
+ * Represents hash for {@link Block}. Immutable.
+ *
+ * TODO Godin: would be better to rename to BlockHash,
+ * also maybe we can incorporate it into Block to reduce memory footprint during detection of duplicates
+ */
+public final class ByteArray {
+
+ private final byte[] bytes;
+
+ /**
+ * Cache for hash code.
+ */
+ private int hash;
+
+ public ByteArray(String hexString) {
+ int len = hexString.length();
+ this.bytes = new byte[len / 2];
+ for (int i = 0; i < len; i += 2) {
+ bytes[i / 2] = (byte) ((Character.digit(hexString.charAt(i), 16) << 4) + Character.digit(hexString.charAt(i + 1), 16));
+ }
+ }
+
+ public ByteArray(byte[] bytes) {
+ this.bytes = new byte[bytes.length];
+ System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
+ }
+
+ public ByteArray(long value) {
+ this.bytes = new byte[] {
+ (byte) (value >>> 56),
+ (byte) (value >>> 48),
+ (byte) (value >>> 40),
+ (byte) (value >>> 32),
+ (byte) (value >>> 24),
+ (byte) (value >>> 16),
+ (byte) (value >>> 8),
+ (byte) value };
+ }
+
+ public ByteArray(int value) {
+ this.bytes = new byte[] {
+ (byte) (value >>> 24),
+ (byte) (value >>> 16),
+ (byte) (value >>> 8),
+ (byte) value };
+ }
+
+ public ByteArray(int[] intArray) {
+ ByteBuffer bb = ByteBuffer.allocate(intArray.length * 4);
+ for (int i : intArray) {
+ bb.putInt(i);
+ }
+ this.bytes = bb.array();
+ }
+
+ public int[] toIntArray() {
+ int size = (bytes.length / 4) + (bytes.length % 4 == 0 ? 0 : 1); // Pad the size to multiple of 4
+ ByteBuffer bb = ByteBuffer.allocate(size * 4);
+ bb.put(bytes);
+ bb.rewind();
+ IntBuffer ib = bb.asIntBuffer();
+ int[] result = new int[size];
+ ib.get(result);
+ return result;
+ }
+
+ private static final String HEXES = "0123456789abcdef";
+
+ public String toHexString() {
+ StringBuilder hex = new StringBuilder(2 * bytes.length);
+ for (byte b : bytes) {
+ hex.append(HEXES.charAt((b & 0xF0) >> 4)).append(HEXES.charAt((b & 0x0F)));
+ }
+ return hex.toString();
+ }
+
+ @Override
+ public String toString() {
+ return toHexString();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ ByteArray other = (ByteArray) o;
+ return Arrays.equals(bytes, other.bytes);
+ }
+
+ @Override
+ public int hashCode() {
+ int h = hash;
+ int len = bytes.length;
+ if (h == 0 && len > 0) {
+ h = Arrays.hashCode(bytes);
+ hash = h;
+ }
+ return h;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/BlocksGroup.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/BlocksGroup.java
new file mode 100644
index 00000000000..55dc41e2298
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/BlocksGroup.java
@@ -0,0 +1,212 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.detector.original;
+
+import java.util.Comparator;
+import java.util.List;
+
+import org.sonar.duplications.block.Block;
+import org.sonar.duplications.utils.FastStringComparator;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Set of {@link Block}s, which internally stored as a sorted list.
+ */
+final class BlocksGroup {
+
+ /**
+ * Factory method.
+ *
+ * @return new empty group
+ */
+ public static BlocksGroup empty() {
+ return new BlocksGroup();
+ }
+
+ protected final List<Block> blocks;
+
+ private BlocksGroup() {
+ this.blocks = Lists.newArrayList();
+ }
+
+ public int size() {
+ return blocks.size();
+ }
+
+ /**
+ * @return true, if this group subsumed by specified group
+ * @see #subsumedBy(BlocksGroup, BlocksGroup, int)
+ */
+ public boolean subsumedBy(BlocksGroup other, int indexCorrection) {
+ return subsumedBy(this, other, indexCorrection);
+ }
+
+ /**
+ * @return intersection of this group with specified
+ * @see #intersect(BlocksGroup, BlocksGroup)
+ */
+ public BlocksGroup intersect(BlocksGroup other) {
+ return intersect(this, other);
+ }
+
+ public List<Block[]> pairs(BlocksGroup other, int len) {
+ return pairs(this, other, len);
+ }
+
+ /**
+ * First block from this group with specified resource id.
+ */
+ public Block first(String resourceId) {
+ for (Block block : blocks) {
+ if (resourceId.equals(block.getResourceId())) {
+ return block;
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public String toString() {
+ return blocks.toString();
+ }
+
+ /**
+ * Intersection of two groups is a group, which contains blocks from second group that have corresponding block from first group
+ * with same resource id and with corrected index.
+ */
+ private static BlocksGroup intersect(BlocksGroup group1, BlocksGroup group2) {
+ BlocksGroup intersection = new BlocksGroup();
+ List<Block> list1 = group1.blocks;
+ List<Block> list2 = group2.blocks;
+ int i = 0;
+ int j = 0;
+ while (i < list1.size() && j < list2.size()) {
+ Block block1 = list1.get(i);
+ Block block2 = list2.get(j);
+ int c = RESOURCE_ID_COMPARATOR.compare(block1.getResourceId(), block2.getResourceId());
+ if (c > 0) {
+ j++;
+ continue;
+ }
+ if (c < 0) {
+ i++;
+ continue;
+ }
+ if (c == 0) {
+ c = block1.getIndexInFile() + 1 - block2.getIndexInFile();
+ }
+ if (c == 0) { // list1[i] == list2[j]
+ i++;
+ j++;
+ intersection.blocks.add(block2);
+ }
+ if (c > 0) { // list1[i] > list2[j]
+ j++;
+ }
+ if (c < 0) { // list1[i] < list2[j]
+ i++;
+ }
+ }
+ return intersection;
+ }
+
+ /**
+ * One group is subsumed by another group, when each block from first group has corresponding block from second group
+ * with same resource id and with corrected index.
+ */
+ private static boolean subsumedBy(BlocksGroup group1, BlocksGroup group2, int indexCorrection) {
+ List<Block> list1 = group1.blocks;
+ List<Block> list2 = group2.blocks;
+ int i = 0;
+ int j = 0;
+ while (i < list1.size() && j < list2.size()) {
+ Block block1 = list1.get(i);
+ Block block2 = list2.get(j);
+ int c = RESOURCE_ID_COMPARATOR.compare(block1.getResourceId(), block2.getResourceId());
+ if (c != 0) {
+ j++;
+ continue;
+ }
+ if (c == 0) {
+ c = block1.getIndexInFile() - indexCorrection - block2.getIndexInFile();
+ }
+ if (c < 0) { // list1[i] < list2[j]
+ break;
+ }
+ if (c != 0) { // list1[i] != list2[j]
+ j++;
+ }
+ if (c == 0) { // list1[i] == list2[j]
+ i++;
+ j++;
+ }
+ }
+ return i == list1.size();
+ }
+
+ private static List<Block[]> pairs(BlocksGroup beginGroup, BlocksGroup endGroup, int len) {
+ List<Block[]> result = Lists.newArrayList();
+ List<Block> beginBlocks = beginGroup.blocks;
+ List<Block> endBlocks = endGroup.blocks;
+ int i = 0;
+ int j = 0;
+ while (i < beginBlocks.size() && j < endBlocks.size()) {
+ Block beginBlock = beginBlocks.get(i);
+ Block endBlock = endBlocks.get(j);
+ int c = RESOURCE_ID_COMPARATOR.compare(beginBlock.getResourceId(), endBlock.getResourceId());
+ if (c == 0) {
+ c = beginBlock.getIndexInFile() + len - 1 - endBlock.getIndexInFile();
+ }
+ if (c == 0) {
+ result.add(new Block[] { beginBlock, endBlock });
+ i++;
+ j++;
+ }
+ if (c > 0) {
+ j++;
+ }
+ if (c < 0) {
+ i++;
+ }
+ }
+ return result;
+ }
+
+ private static final Comparator<String> RESOURCE_ID_COMPARATOR = FastStringComparator.INSTANCE;
+
+ /**
+ * Compares {@link Block}s first using {@link Block#getResourceId() resource id} and then using {@link Block#getIndexInFile() index in file}.
+ */
+ public static class BlockComparator implements Comparator<Block> {
+
+ public static final BlockComparator INSTANCE = new BlockComparator();
+
+ public int compare(Block b1, Block b2) {
+ int c = RESOURCE_ID_COMPARATOR.compare(b1.getResourceId(), b2.getResourceId());
+ if (c == 0) {
+ return b1.getIndexInFile() - b2.getIndexInFile();
+ }
+ return c;
+ }
+
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java
new file mode 100644
index 00000000000..3574b2fed2e
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java
@@ -0,0 +1,154 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.detector.original;
+
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+
+import org.sonar.duplications.index.CloneGroup;
+import org.sonar.duplications.index.ClonePart;
+import org.sonar.duplications.utils.FastStringComparator;
+import org.sonar.duplications.utils.SortedListsUtils;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Performs incremental and brute force algorithm in order to filter clones, which are fully covered by other clones.
+ * All clones for filtering must be of the same origin - there is no sanity check on this.
+ * In a worst case it performs O(N*N) comparisons.
+ * <p>
+ * Godin: This implementation was chosen because it simple.
+ * And I wasn't able to find big difference in performance with an interval tree, which we had for the moment of testing.
+ * Whereas in fact I expected that interval tree would be better for this task.
+ * Moreover with interval tree we also can use incremental approach,
+ * but we did not had an implementation with remove operation for the moment of testing.
+ * </p>
+ */
+final class Filter {
+
+ /**
+ * Note that LinkedList should provide better performance here, because of use of operation remove.
+ *
+ * @see #add(CloneGroup)
+ */
+ private final List<CloneGroup> filtered = Lists.newLinkedList();
+
+ /**
+ * @return current results of filtering
+ */
+ public List<CloneGroup> getResult() {
+ return filtered;
+ }
+
+ /**
+ * Running time - O(N*2*C), where N - number of clones, which was found earlier and C - time of {@link #containsIn(CloneGroup, CloneGroup)}.
+ */
+ public void add(CloneGroup current) {
+ Iterator<CloneGroup> i = filtered.iterator();
+ while (i.hasNext()) {
+ CloneGroup earlier = i.next();
+ // Note that following two conditions cannot be true together - proof by contradiction:
+ // let C be the current clone and A and B were found earlier
+ // then since relation is transitive - (A in C) and (C in B) => (A in B)
+ // so A should be filtered earlier
+ if (Filter.containsIn(current, earlier)) {
+ // current clone fully covered by clone, which was found earlier
+ return;
+ }
+ if (Filter.containsIn(earlier, current)) {
+ // current clone fully covers clone, which was found earlier
+ i.remove();
+ }
+ }
+ filtered.add(current);
+ }
+
+ /**
+ * Checks that second clone contains first one.
+ * <p>
+ * Clone A is contained in another clone B, if every part pA from A has part pB in B,
+ * which satisfy the conditions:
+ * <pre>
+ * (pA.resourceId == pB.resourceId) and (pB.unitStart <= pA.unitStart) and (pA.unitEnd <= pB.unitEnd)
+ * </pre>
+ * And all resourcesId from B exactly the same as all resourceId from A, which means that also every part pB from B has part pA in A,
+ * which satisfy the condition:
+ * <pre>
+ * pB.resourceId == pA.resourceId
+ * </pre>
+ * So this relation is:
+ * <ul>
+ * <li>reflexive - A in A</li>
+ * <li>transitive - (A in B) and (B in C) => (A in C)</li>
+ * <li>antisymmetric - (A in B) and (B in A) <=> (A = B)</li>
+ * </ul>
+ * </p>
+ * <p>
+ * <strong>Important: this method relies on fact that all parts were already sorted by resourceId and unitStart by using
+ * {@link BlocksGroup.BlockComparator}, which uses {@link FastStringComparator} for comparison by resourceId.</strong>
+ * </p>
+ * <p>
+ * Running time - O(|A|+|B|).
+ * </p>
+ */
+ static boolean containsIn(CloneGroup first, CloneGroup second) {
+ if (first.getCloneUnitLength() > second.getCloneUnitLength()) {
+ return false;
+ }
+ List<ClonePart> firstParts = first.getCloneParts();
+ List<ClonePart> secondParts = second.getCloneParts();
+ return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(first.getCloneUnitLength(), second.getCloneUnitLength()))
+ && SortedListsUtils.contains(firstParts, secondParts, RESOURCE_ID_COMPARATOR);
+ }
+
+ private static final Comparator<ClonePart> RESOURCE_ID_COMPARATOR = new Comparator<ClonePart>() {
+ public int compare(ClonePart o1, ClonePart o2) {
+ return FastStringComparator.INSTANCE.compare(o1.getResourceId(), o2.getResourceId());
+ }
+ };
+
+ private static final class ContainsInComparator implements Comparator<ClonePart> {
+ private final int l1, l2;
+
+ public ContainsInComparator(int l1, int l2) {
+ this.l1 = l1;
+ this.l2 = l2;
+ }
+
+ public int compare(ClonePart o1, ClonePart o2) {
+ int c = FastStringComparator.INSTANCE.compare(o1.getResourceId(), o2.getResourceId());
+ if (c == 0) {
+ if (o2.getUnitStart() <= o1.getUnitStart()) {
+ if (o1.getUnitStart() + l1 <= o2.getUnitStart() + l2) {
+ return 0; // match found - stop search
+ } else {
+ return 1; // continue search
+ }
+ } else {
+ return -1; // o1 < o2 by unitStart - stop search
+ }
+ } else {
+ return c;
+ }
+ }
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/OriginalCloneDetectionAlgorithm.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/OriginalCloneDetectionAlgorithm.java
new file mode 100644
index 00000000000..c6eb043d2b1
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/OriginalCloneDetectionAlgorithm.java
@@ -0,0 +1,226 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.detector.original;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.sonar.duplications.block.Block;
+import org.sonar.duplications.block.ByteArray;
+import org.sonar.duplications.index.CloneGroup;
+import org.sonar.duplications.index.CloneIndex;
+import org.sonar.duplications.index.ClonePart;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+/**
+ * Implementation of algorithm described in paper
+ * <a href="http://www4.in.tum.de/~juergens/publications/icsm2010_crc.pdf">Index-Based Code Clone Detection: Incremental, Distributed, Scalable</a>
+ * by Benjamin Hummel, Elmar Juergens, Michael Conradt and Lars Heinemann.
+ */
+public final class OriginalCloneDetectionAlgorithm {
+
+ /**
+ * Performs detection and returns list of clone groups between file (which represented as a collection of blocks) and index.
+ * Note that this method ignores blocks for this file, that will be retrieved from index.
+ */
+ public static List<CloneGroup> detect(CloneIndex cloneIndex, Collection<Block> fileBlocks) {
+ if (fileBlocks.isEmpty()) {
+ return Collections.EMPTY_LIST;
+ }
+ OriginalCloneDetectionAlgorithm reporter = new OriginalCloneDetectionAlgorithm(cloneIndex);
+ reporter.findClones(fileBlocks);
+ return reporter.filter.getResult();
+ }
+
+ private final CloneIndex cloneIndex;
+
+ private final Filter filter = new Filter();
+
+ private String originResourceId;
+
+ private OriginalCloneDetectionAlgorithm(CloneIndex cloneIndex) {
+ this.cloneIndex = cloneIndex;
+ }
+
+ private BlocksGroup[] createGroups(Collection<Block> fileBlocks) {
+ // 2: let f be the list of tuples corresponding to filename sorted by statement index
+ // either read from the index or calculated on the fly
+ int size = fileBlocks.size();
+
+ // Godin: create one group per unique hash
+ Map<ByteArray, BlocksGroup> groupsByHash = Maps.newHashMap(); // TODO Godin: can we create map with expected size?
+ for (Block fileBlock : fileBlocks) {
+ ByteArray hash = fileBlock.getBlockHash();
+ BlocksGroup sameHash = groupsByHash.get(hash);
+ if (sameHash == null) {
+ sameHash = BlocksGroup.empty();
+ groupsByHash.put(hash, sameHash);
+ }
+ sameHash.blocks.add(fileBlock);
+ }
+
+ // Godin: retrieve blocks from index
+ for (Map.Entry<ByteArray, BlocksGroup> entry : groupsByHash.entrySet()) {
+ ByteArray hash = entry.getKey();
+ BlocksGroup group = entry.getValue();
+ for (Block blockFromIndex : cloneIndex.getBySequenceHash(hash)) {
+ // Godin: skip blocks for this file if they come from index
+ if (!originResourceId.equals(blockFromIndex.getResourceId())) {
+ group.blocks.add(blockFromIndex);
+ }
+ }
+ Collections.sort(group.blocks, BlocksGroup.BlockComparator.INSTANCE);
+ }
+
+ // 3: let c be a list with c(0) = empty
+ BlocksGroup[] sameHashBlocksGroups = new BlocksGroup[size + 2];
+ sameHashBlocksGroups[0] = BlocksGroup.empty();
+ // 4: for i := 1 to length(f) do
+ for (Block fileBlock : fileBlocks) {
+ ByteArray hash = fileBlock.getBlockHash();
+ int i = fileBlock.getIndexInFile() + 1;
+ // 5: retrieve tuples with same sequence hash as f(i)
+ // 6: store this set as c(i)
+ sameHashBlocksGroups[i] = groupsByHash.get(hash);
+ }
+
+ // Godin: allows to report clones at the end of file, because condition at line 13 would be evaluated as true
+ sameHashBlocksGroups[size + 1] = BlocksGroup.empty();
+
+ return sameHashBlocksGroups;
+ }
+
+ private void findClones(Collection<Block> fileBlocks) {
+ originResourceId = fileBlocks.iterator().next().getResourceId();
+
+ BlocksGroup[] sameHashBlocksGroups = createGroups(fileBlocks);
+
+ // 7: for i := 1 to length(c) do
+ for (int i = 1; i < sameHashBlocksGroups.length; i++) {
+ // In the main loop (starting from Line 7), we first check
+ // whether any new clones might start at this position. If there
+ // is only a single tuple with this hash (which has to belong
+ // to the inspected file at the current location) we skip this loop
+ // iteration. The same holds if all tuples at position i have already
+ // been present at position i − 1, as in this case any clone group
+ // found at position i would be included in a clone group starting
+ // at position i − 1.
+
+ // Although we use the subset operator in the
+ // algorithm description, this is not really a subset operation,
+ // as of course the statement index of the tuples in c(i) will be
+ // increased by 1 compared to the corresponding ones in c(i − 1)
+ // and the hash and info fields will differ.
+
+ // 8: if |c(i)| < 2 or c(i) subsumed by c(i - 1) then
+ if (sameHashBlocksGroups[i].size() < 2 || sameHashBlocksGroups[i].subsumedBy(sameHashBlocksGroups[i - 1], 1)) {
+ // 9: continue with next loop iteration
+ continue;
+ }
+
+ // The set a introduced in Line 10 is called the active set and
+ // contains all tuples corresponding to clones which have not yet
+ // been reported. At each iteration of the inner loop the set a
+ // is reduced to tuples which are also present in c(j); again the
+ // intersection operator has to account for the increased statement
+ // index and different hash and info fields. The new value is
+ // stored in a0. Clones are only reported, if tuples are lost in
+ // Line 12, as otherwise all current clones could be prolonged
+ // by one statement. Clone reporting matches tuples that, after
+ // correction of the statement index, appear in both c(i) and a;
+ // each matched pair corresponds to a single clone. Its location
+ // can be extracted from the filename and info fields.
+
+ // 10: let a := c(i)
+ BlocksGroup currentBlocksGroup = sameHashBlocksGroups[i];
+ // 11: for j := i + 1 to length(c) do
+ for (int j = i + 1; j < sameHashBlocksGroups.length; j++) {
+ // 12: let a0 := a intersect c(j)
+ BlocksGroup intersectedBlocksGroup = currentBlocksGroup.intersect(sameHashBlocksGroups[j]);
+
+ // 13: if |a0| < |a| then
+ if (intersectedBlocksGroup.size() < currentBlocksGroup.size()) {
+ // 14: report clones from c(i) to a (see text)
+
+ // One problem of this algorithm is that clone classes with
+ // multiple instances in the same file are encountered and
+ // reported multiple times. Furthermore, when calculating the clone
+ // groups for all files in a system, clone groups will be reported
+ // more than once as well. Both cases can be avoided, by
+ // checking whether the first element of a0 (with respect to a
+ // fixed order) is equal to f(j) and only report in this case.
+
+ Block first = currentBlocksGroup.first(originResourceId);
+ if (first.getIndexInFile() == j - 2) {
+ // Godin: We report clones, which start in i-1 and end in j-2, so length is j-2-(i-1)+1=j-i
+ reportClones(sameHashBlocksGroups[i], currentBlocksGroup, j - i);
+ }
+ }
+ // 15: a := a0
+ currentBlocksGroup = intersectedBlocksGroup;
+
+ // Line 16 early exits the inner loop if either no more clones are starting
+ // from position i (i.e., a is too small), or if all tuples from a
+ // have already been in c(i − 1), corrected for statement index.
+ // In this case they have already been reported in the previous
+ // iteration of the outer loop.
+
+ // IMPORTANT Godin: note that difference in indexes between "a" and "c(i-1)" greater than one,
+ // so method subsumedBy should take this into account
+
+ // 16: if |a| < 2 or a subsumed by c(i-1) then
+ if (currentBlocksGroup.size() < 2 || currentBlocksGroup.subsumedBy(sameHashBlocksGroups[i - 1], j - i + 1)) {
+ // 17: break inner loop
+ break;
+ }
+ }
+ }
+ }
+
+ private void reportClones(BlocksGroup beginGroup, BlocksGroup endGroup, int cloneLength) {
+ List<Block[]> pairs = beginGroup.pairs(endGroup, cloneLength);
+
+ ClonePart origin = null;
+ List<ClonePart> parts = Lists.newArrayList();
+
+ for (int i = 0; i < pairs.size(); i++) {
+ Block[] pair = pairs.get(i);
+ ClonePart part = new ClonePart(pair[0].getResourceId(), pair[0].getIndexInFile(), pair[0].getFirstLineNumber(), pair[1].getLastLineNumber());
+ parts.add(part);
+
+ if (originResourceId.equals(part.getResourceId())) {
+ if (origin == null) {
+ origin = part;
+ } else {
+ if (part.getUnitStart() < origin.getUnitStart()) {
+ origin = part;
+ }
+ }
+ }
+ }
+
+ filter.add(new CloneGroup(cloneLength, origin, parts));
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/AbstractCloneIndex.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/AbstractCloneIndex.java
new file mode 100644
index 00000000000..88ec3bad6d2
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/AbstractCloneIndex.java
@@ -0,0 +1,24 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.index;
+
+public abstract class AbstractCloneIndex implements CloneIndex {
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java
new file mode 100644
index 00000000000..f6552237322
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java
@@ -0,0 +1,107 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.index;
+
+import java.util.List;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * Groups a set of related {@link ClonePart}s.
+ */
+public class CloneGroup {
+
+ private final ClonePart originPart;
+ private final int cloneLength;
+ private final List<ClonePart> parts;
+
+ /**
+ * Cache for hash code.
+ */
+ private int hash;
+
+ public CloneGroup(int cloneLength, ClonePart origin, List<ClonePart> parts) {
+ this.cloneLength = cloneLength;
+ this.originPart = origin;
+ this.parts = ImmutableList.copyOf(parts);
+ }
+
+ public ClonePart getOriginPart() {
+ return originPart;
+ }
+
+ /**
+ * @return clone length in units (not in lines)
+ */
+ public int getCloneUnitLength() {
+ return cloneLength;
+ }
+
+ public List<ClonePart> getCloneParts() {
+ return parts;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder builder = new StringBuilder();
+ for (ClonePart part : parts) {
+ builder.append(part).append(" - ");
+ }
+ builder.append(cloneLength);
+ return builder.toString();
+ }
+
+ /**
+ * Two groups are equal, if they have same length, same origins and contain same parts in same order.
+ */
+ @Override
+ public boolean equals(Object object) {
+ if (!(object instanceof CloneGroup)) {
+ return false;
+ }
+ CloneGroup another = (CloneGroup) object;
+ if (another.cloneLength != cloneLength || parts.size() != another.parts.size()) {
+ return false;
+ }
+ if (!originPart.equals(another.originPart)) {
+ return false;
+ }
+ boolean result = true;
+ for (int i = 0; i < parts.size(); i++) {
+ result &= another.parts.get(i).equals(parts.get(i));
+ }
+ return result;
+ }
+
+ @Override
+ public int hashCode() {
+ int h = hash;
+ if (h == 0 && cloneLength != 0) {
+ for (ClonePart part : parts) {
+ h = 31 * h + part.hashCode();
+ }
+ h = 31 * h + originPart.hashCode();
+ h = 31 * h + cloneLength;
+ hash = h;
+ }
+ return h;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneIndex.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneIndex.java
new file mode 100644
index 00000000000..b06f6775c42
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneIndex.java
@@ -0,0 +1,48 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.index;
+
+import java.util.Collection;
+
+import org.sonar.duplications.block.Block;
+import org.sonar.duplications.block.ByteArray;
+
+public interface CloneIndex {
+
+ /**
+ * Performs search of blocks for specified resource.
+ *
+ * @return collection of blocks from index for specified resource and empty collection if nothing found
+ */
+ Collection<Block> getByResourceId(String resourceId);
+
+ /**
+ * Performs search of blocks for specified hash value.
+ *
+ * @return collection of blocks from index with specified hash and empty collection if nothing found
+ */
+ Collection<Block> getBySequenceHash(ByteArray hash);
+
+ /**
+ * Adds specified block into index.
+ */
+ void insert(Block block);
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java
new file mode 100644
index 00000000000..2f31bf6f93a
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java
@@ -0,0 +1,91 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.index;
+
+public class ClonePart {
+
+ private final String resourceId;
+ private final int unitStart;
+ private final int lineStart;
+ private final int lineEnd;
+
+ /**
+ * Cache for hash code.
+ */
+ private int hash;
+
+ public ClonePart(String resourceId, int unitStart, int lineStart, int lineEnd) {
+ this.resourceId = resourceId;
+ this.unitStart = unitStart;
+ this.lineStart = lineStart;
+ this.lineEnd = lineEnd;
+ }
+
+ public String getResourceId() {
+ return resourceId;
+ }
+
+ public int getUnitStart() {
+ return unitStart;
+ }
+
+ public int getLineStart() {
+ return lineStart;
+ }
+
+ public int getLineEnd() {
+ return lineEnd;
+ }
+
+ public int getLines() {
+ return lineEnd - lineStart + 1;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj instanceof ClonePart) {
+ ClonePart another = (ClonePart) obj;
+ return another.resourceId.equals(resourceId)
+ && another.lineStart == lineStart
+ && another.lineEnd == lineEnd
+ && another.unitStart == unitStart;
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ int h = hash;
+ if (h == 0) {
+ h = resourceId.hashCode();
+ h = 31 * h + lineStart;
+ h = 31 * h + lineEnd;
+ h = 31 * h + unitStart;
+ hash = h;
+ }
+ return h;
+ }
+
+ @Override
+ public String toString() {
+ return "'" + resourceId + "':[" + unitStart + "|" + lineStart + "-" + lineEnd + "]";
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/DataUtils.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/DataUtils.java
new file mode 100644
index 00000000000..65eb1dbb692
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/DataUtils.java
@@ -0,0 +1,116 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.index;
+
+public final class DataUtils {
+
+ public interface Sortable {
+
+ /**
+ * @return the number of elements.
+ */
+ int size();
+
+ /**
+ * Swaps elements in positions i and j.
+ */
+ void swap(int i, int j);
+
+ /**
+ * @return true if element in position i less than in position j.
+ */
+ boolean isLess(int i, int j);
+
+ }
+
+ /**
+ * Value for search must be stored in position {@link Sortable#size() size}.
+ */
+ public static int binarySearch(Sortable data) {
+ int value = data.size();
+ int lower = 0;
+ int upper = data.size();
+ while (lower < upper) {
+ int mid = (lower + upper) >> 1;
+ if (data.isLess(mid, value)) {
+ lower = mid + 1;
+ } else {
+ upper = mid;
+ }
+ }
+ return lower;
+ }
+
+ public static void sort(Sortable data) {
+ quickSort(data, 0, data.size() - 1);
+ }
+
+ private static void bubbleSort(Sortable data, int left, int right) {
+ for (int i = right; i > left; i--) {
+ for (int j = left; j < i; j++) {
+ if (data.isLess(j + 1, j)) {
+ data.swap(j, j + 1);
+ }
+ }
+ }
+ }
+
+ private static int partition(Sortable data, int i, int j) {
+ // can be selected randomly
+ int pivot = i + (j - i) / 2;
+ while (i <= j) {
+ while (data.isLess(i, pivot)) {
+ i++;
+ }
+ while (data.isLess(pivot, j)) {
+ j--;
+ }
+ if (i <= j) {
+ data.swap(i, j);
+ if (i == pivot) {
+ pivot = j;
+ } else if (j == pivot) {
+ pivot = i;
+ }
+ i++;
+ j--;
+ }
+ }
+ return i;
+ }
+
+ private static void quickSort(Sortable data, int low, int high) {
+ if (high - low < 5) {
+ bubbleSort(data, low, high);
+ return;
+ }
+ int i = partition(data, low, high);
+ if (low < i - 1) {
+ quickSort(data, low, i - 1);
+ }
+ if (i < high) {
+ quickSort(data, i, high);
+ }
+ }
+
+ private DataUtils() {
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/MemoryCloneIndex.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/MemoryCloneIndex.java
new file mode 100644
index 00000000000..1fbce94e817
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/MemoryCloneIndex.java
@@ -0,0 +1,84 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.index;
+
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.Comparator;
+
+import org.sonar.duplications.block.Block;
+import org.sonar.duplications.block.ByteArray;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.TreeMultimap;
+
+public class MemoryCloneIndex implements CloneIndex {
+
+ private final TreeMultimap<String, Block> filenameIndex;
+ private final HashMultimap<ByteArray, Block> sequenceHashIndex;
+
+ private static final class ValueComparator implements Comparator<Block>, Serializable {
+
+ private static final long serialVersionUID = 6048010242032502222L;
+
+ public int compare(Block o1, Block o2) {
+ if (o2.getResourceId().equals(o1.getResourceId())) {
+ return o1.getIndexInFile() - o2.getIndexInFile();
+ }
+ return o1.getResourceId().compareTo(o2.getResourceId());
+ }
+ }
+
+ private static final class KeyComparator implements Comparator<String>, Serializable {
+
+ private static final long serialVersionUID = 8705841881237170539L;
+
+ public int compare(String o1, String o2) {
+ return o1.compareTo(o2);
+ }
+ }
+
+ public MemoryCloneIndex() {
+ filenameIndex = TreeMultimap.create(new KeyComparator(), new ValueComparator());
+ sequenceHashIndex = HashMultimap.create();
+ }
+
+ public Collection<String> getAllUniqueResourceId() {
+ return filenameIndex.keySet();
+ }
+
+ public boolean containsResourceId(String resourceId) {
+ return filenameIndex.containsKey(resourceId);
+ }
+
+ public Collection<Block> getByResourceId(String fileName) {
+ return filenameIndex.get(fileName);
+ }
+
+ public Collection<Block> getBySequenceHash(ByteArray sequenceHash) {
+ return sequenceHashIndex.get(sequenceHash);
+ }
+
+ public void insert(Block tuple) {
+ filenameIndex.put(tuple.getResourceId(), tuple);
+ sequenceHashIndex.put(tuple.getBlockHash(), tuple);
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/MemoryCloneIndex2.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/MemoryCloneIndex2.java
new file mode 100644
index 00000000000..ae4e9c35da4
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/MemoryCloneIndex2.java
@@ -0,0 +1,65 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.index;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.sonar.duplications.block.Block;
+import org.sonar.duplications.block.ByteArray;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+public class MemoryCloneIndex2 extends AbstractCloneIndex {
+
+ private Map<String, List<Block>> byResource = Maps.newHashMap();
+ private Map<ByteArray, List<Block>> byHash = Maps.newHashMap();
+
+ public Collection<Block> getByResourceId(String resourceId) {
+ return get(byResource, resourceId);
+ }
+
+ public Collection<Block> getBySequenceHash(ByteArray sequenceHash) {
+ return get(byHash, sequenceHash);
+ }
+
+ public void insert(Block block) {
+ put(byResource, block.getResourceId(), block);
+ put(byHash, block.getBlockHash(), block);
+ }
+
+ private static <T> List<Block> get(Map<T, List<Block>> map, T key) {
+ List<Block> blocks = map.get(key);
+ return blocks != null ? blocks : Collections.EMPTY_LIST;
+ }
+
+ private static <T> void put(Map<T, List<Block>> map, T key, Block value) {
+ List<Block> blocks = map.get(key);
+ if (blocks == null) {
+ blocks = Lists.newLinkedList();
+ map.put(key, blocks);
+ }
+ blocks.add(value);
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java
new file mode 100644
index 00000000000..c19bbfdd042
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java
@@ -0,0 +1,280 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.index;
+
+import java.util.Collection;
+import java.util.List;
+
+import org.sonar.duplications.block.Block;
+import org.sonar.duplications.block.ByteArray;
+import org.sonar.duplications.utils.FastStringComparator;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Provides an index optimized by memory.
+ * <p>
+ * Each object in Java has an overhead - see
+ * <a href="http://devblog.streamy.com/2009/07/24/determine-size-of-java-object-class/">"HOWTO: Determine the size of a Java Object or Class"</a>.
+ * So to optimize memory consumption, we use flat arrays, however this increases time of queries.
+ * During usual detection of duplicates most time consuming method is a {@link #getByResourceId(String)}:
+ * around 50% of time spent in this class and number of invocations of this method is 1% of total invocations,
+ * however total time spent in this class less than 1 second for small projects and around 2 seconds for projects like JDK.
+ * </p>
+ * <p>
+ * Note that this implementation currently does not support deletion, however it's possible to implement.
+ * </p>
+ */
+public class PackedMemoryCloneIndex extends AbstractCloneIndex {
+
+ private static final int DEFAULT_INITIAL_CAPACITY = 1024;
+
+ private static final int BLOCK_INTS = 3;
+
+ private final int hashInts;
+
+ private final int blockInts;
+
+ /**
+ * Indicates that index requires sorting to perform queries.
+ */
+ private boolean sorted;
+
+ /**
+ * Current number of blocks in index.
+ */
+ private int size;
+
+ private String[] resourceIds;
+ private int[] blockData;
+
+ private int[] resourceIdsIndex;
+
+ public PackedMemoryCloneIndex() {
+ this(8, DEFAULT_INITIAL_CAPACITY);
+ }
+
+ /**
+ * @param hashBytes size of hash in bytes
+ * @param initialCapacity the initial capacity
+ */
+ public PackedMemoryCloneIndex(int hashBytes, int initialCapacity) {
+ this.sorted = false;
+ this.hashInts = hashBytes / 4;
+ this.blockInts = hashInts + BLOCK_INTS;
+ this.size = 0;
+ this.resourceIds = new String[initialCapacity];
+ this.blockData = new int[initialCapacity * blockInts];
+ this.resourceIdsIndex = new int[initialCapacity];
+ }
+
+ /**
+ * {@inheritDoc}
+ * <p>
+ * <strong>Note that this implementation does not guarantee that blocks would be sorted by index.</strong>
+ * </p>
+ */
+ public Collection<Block> getByResourceId(String resourceId) {
+ ensureSorted();
+
+ // prepare resourceId for binary search
+ resourceIds[size] = resourceId;
+ resourceIdsIndex[size] = size;
+
+ int index = DataUtils.binarySearch(byResourceId);
+
+ List<Block> result = Lists.newArrayList();
+ int realIndex = resourceIdsIndex[index];
+ while (index < size && FastStringComparator.INSTANCE.compare(resourceIds[realIndex], resourceId) == 0) {
+ // extract block (note that there is no need to extract resourceId)
+ int offset = realIndex * blockInts;
+ int[] hash = new int[hashInts];
+ for (int j = 0; j < hashInts; j++) {
+ hash[j] = blockData[offset++];
+ }
+ int indexInFile = blockData[offset++];
+ int firstLineNumber = blockData[offset++];
+ int lastLineNumber = blockData[offset];
+
+ result.add(new Block(resourceId, new ByteArray(hash), indexInFile, firstLineNumber, lastLineNumber));
+
+ index++;
+ realIndex = resourceIdsIndex[index];
+ }
+ return result;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public Collection<Block> getBySequenceHash(ByteArray sequenceHash) {
+ ensureSorted();
+
+ // prepare hash for binary search
+ int[] hash = sequenceHash.toIntArray();
+ if (hash.length != hashInts) {
+ throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length);
+ }
+ int offset = size * blockInts;
+ for (int i = 0; i < hashInts; i++) {
+ blockData[offset++] = hash[i];
+ }
+
+ int index = DataUtils.binarySearch(byBlockHash);
+
+ List<Block> result = Lists.newArrayList();
+ while (index < size && !isLessByHash(size, index)) {
+ // extract block (note that there is no need to extract hash)
+ String resourceId = resourceIds[index];
+ offset = index * blockInts + hashInts;
+ int indexInFile = blockData[offset++];
+ int firstLineNumber = blockData[offset++];
+ int lastLineNumber = blockData[offset];
+
+ result.add(new Block(resourceId, sequenceHash, indexInFile, firstLineNumber, lastLineNumber));
+ index++;
+ }
+ return result;
+ }
+
+ /**
+ * {@inheritDoc}
+ * <p>
+ * <strong>Note that this implementation allows insertion of two blocks with same index for one resource.</strong>
+ * </p>
+ */
+ public void insert(Block block) {
+ sorted = false;
+ ensureCapacity();
+
+ resourceIds[size] = block.getResourceId();
+
+ int[] hash = block.getBlockHash().toIntArray();
+ if (hash.length != hashInts) {
+ throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length);
+ }
+ int offset = size * blockInts;
+ for (int i = 0; i < hashInts; i++) {
+ blockData[offset++] = hash[i];
+ }
+ blockData[offset++] = block.getIndexInFile();
+ blockData[offset++] = block.getFirstLineNumber();
+ blockData[offset] = block.getLastLineNumber();
+
+ size++;
+ }
+
+ /**
+ * Increases the capacity, if necessary.
+ */
+ private void ensureCapacity() {
+ if (size < resourceIds.length) {
+ return;
+ }
+ int newCapacity = (resourceIds.length * 3) / 2 + 1;
+ // Increase size of resourceIds
+ String[] oldResourceIds = resourceIds;
+ resourceIds = new String[newCapacity];
+ System.arraycopy(oldResourceIds, 0, resourceIds, 0, oldResourceIds.length);
+ // Increase size of blockData
+ int[] oldBlockData = blockData;
+ blockData = new int[newCapacity * blockInts];
+ System.arraycopy(oldBlockData, 0, blockData, 0, oldBlockData.length);
+ // Increase size of byResourceIndices (no need to copy old, because would be restored in method ensureSorted)
+ resourceIdsIndex = new int[newCapacity];
+ sorted = false;
+ }
+
+ /**
+ * Performs sorting, if necessary.
+ */
+ private void ensureSorted() {
+ if (sorted) {
+ return;
+ }
+
+ ensureCapacity();
+
+ DataUtils.sort(byBlockHash);
+ for (int i = 0; i < size; i++) {
+ resourceIdsIndex[i] = i;
+ }
+ DataUtils.sort(byResourceId);
+
+ sorted = true;
+ }
+
+ private boolean isLessByHash(int i, int j) {
+ i *= blockInts;
+ j *= blockInts;
+ for (int k = 0; k < hashInts; k++, i++, j++) {
+ if (blockData[i] < blockData[j]) {
+ return true;
+ }
+ if (blockData[i] > blockData[j]) {
+ return false;
+ }
+ }
+ return false;
+ }
+
+ private final DataUtils.Sortable byBlockHash = new DataUtils.Sortable() {
+ public void swap(int i, int j) {
+ String tmp = resourceIds[i];
+ resourceIds[i] = resourceIds[j];
+ resourceIds[j] = tmp;
+
+ i *= blockInts;
+ j *= blockInts;
+ for (int k = 0; k < blockInts; k++, i++, j++) {
+ int x = blockData[i];
+ blockData[i] = blockData[j];
+ blockData[j] = x;
+ }
+ }
+
+ public boolean isLess(int i, int j) {
+ return isLessByHash(i, j);
+ }
+
+ public int size() {
+ return size;
+ }
+ };
+
+ private final DataUtils.Sortable byResourceId = new DataUtils.Sortable() {
+ public void swap(int i, int j) {
+ int tmp = resourceIdsIndex[i];
+ resourceIdsIndex[i] = resourceIdsIndex[j];
+ resourceIdsIndex[j] = tmp;
+ }
+
+ public boolean isLess(int i, int j) {
+ String s1 = resourceIds[resourceIdsIndex[i]];
+ String s2 = resourceIds[resourceIdsIndex[j]];
+ return FastStringComparator.INSTANCE.compare(s1, s2) < 0;
+ }
+
+ public int size() {
+ return size;
+ }
+ };
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaStatementBuilder.java b/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaStatementBuilder.java
new file mode 100644
index 00000000000..99a74d26927
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaStatementBuilder.java
@@ -0,0 +1,50 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.java;
+
+import org.sonar.duplications.statement.StatementChunker;
+
+import static org.sonar.duplications.statement.TokenMatcherFactory.*;
+
+public final class JavaStatementBuilder {
+
+ private JavaStatementBuilder() {
+ }
+
+ public static StatementChunker build() {
+ return StatementChunker.builder()
+ .ignore(from("import"), to(";"))
+ .ignore(from("package"), to(";"))
+ .ignore(token("}"))
+ .ignore(token("{"))
+ .statement(from("@"), anyToken(), opt(bridge("(", ")")))
+ .statement(from("do"))
+ .statement(from("if"), bridge("(", ")"))
+ .statement(from("else"), token("if"), bridge("(", ")"))
+ .statement(from("else"))
+ .statement(from("for"), bridge("(", ")"))
+ .statement(from("while"), bridge("(", ")"))
+ .statement(from("case"), to(":"))
+ .statement(from("default"), to(":"))
+ .statement(to(";", "{", "}"), forgetLastToken())
+ .build();
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaTokenProducer.java b/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaTokenProducer.java
new file mode 100644
index 00000000000..d9d922fcb27
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaTokenProducer.java
@@ -0,0 +1,74 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.java;
+
+import org.sonar.duplications.token.TokenChunker;
+
+/**
+ * See <a href="http://java.sun.com/docs/books/jls/third_edition/html/lexical.html">The Java Language Specification, Third Edition: Lexical Structure</a>
+ *
+ * <p>
+ * We decided to use dollar sign as a prefix for normalization, even if it can be a part of an identifier,
+ * because according to Java Language Specification it supposed to be used only in mechanically generated source code.
+ * Thus probability to find it within a normal code should be low.
+ * </p>
+ */
+public final class JavaTokenProducer {
+
+ private JavaTokenProducer() {
+ }
+
+ private static final String NORMALIZED_CHARACTER_LITERAL = "$CHARS";
+ private static final String NORMALIZED_NUMERIC_LITERAL = "$NUMBER";
+
+ private static final String EXP = "([Ee][+-]?+[0-9]++)";
+ private static final String BINARY_EXP = "([Pp][+-]?+[0-9]++)";
+
+ private static final String FLOAT_SUFFIX = "[fFdD]";
+ private static final String INT_SUFFIX = "[lL]";
+
+ public static TokenChunker build() {
+ return TokenChunker.builder()
+ // White Space
+ .ignore("\\s")
+ // Comments
+ .ignore("//[^\\n\\r]*+")
+ .ignore("/\\*[\\s\\S]*?\\*/")
+ // String Literals
+ .token("\"([^\"\\\\]*+(\\\\[\\s\\S])?+)*+\"", NORMALIZED_CHARACTER_LITERAL)
+ // Character Literals
+ .token("'([^'\\n\\\\]*+(\\\\.)?+)*+'", NORMALIZED_CHARACTER_LITERAL)
+ // Identifiers, Keywords, Boolean Literals, The Null Literal
+ .token("\\p{javaJavaIdentifierStart}++\\p{javaJavaIdentifierPart}*+")
+ // Floating-Point Literals
+ .token("[0-9]++\\.([0-9]++)?+" + EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL)
+ .token("\\.[0-9]++" + EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL)
+ .token("[0-9]++" + EXP + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL)
+ .token("0[xX][0-9a-fA-F]++\\.[0-9a-fA-F]*+" + BINARY_EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL)
+ .token("0[xX][0-9a-fA-F]++" + BINARY_EXP + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL)
+ // Integer Literals
+ .token("0[xX][0-9a-fA-F]++" + INT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL)
+ .token("[0-9]++" + INT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL)
+ // Any other character
+ .token(".")
+ .build();
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java
new file mode 100644
index 00000000000..224e9a7c01d
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java
@@ -0,0 +1,96 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.statement;
+
+import java.util.List;
+
+import org.sonar.duplications.token.Token;
+
+public class Statement {
+
+ private final int startLine;
+ private final int endLine;
+ private final String value;
+
+ /**
+ * Cache for hash code.
+ */
+ private int hash;
+
+ public Statement(int startLine, int endLine, String value) {
+ this.startLine = startLine;
+ this.endLine = endLine;
+ this.value = value;
+ }
+
+ public Statement(List<Token> tokens) {
+ if (tokens == null || tokens.isEmpty()) {
+ throw new IllegalArgumentException("A statement can't be initialized with an empty list of tokens");
+ }
+ StringBuilder sb = new StringBuilder();
+ for (Token token : tokens) {
+ sb.append(token.getValue());
+ }
+ this.value = sb.toString();
+ this.startLine = tokens.get(0).getLine();
+ this.endLine = tokens.get(tokens.size() - 1).getLine();
+ }
+
+ public int getStartLine() {
+ return startLine;
+ }
+
+ public int getEndLine() {
+ return endLine;
+ }
+
+ public String getValue() {
+ return value;
+ }
+
+ @Override
+ public int hashCode() {
+ int h = hash;
+ if (h == 0) {
+ h = value.hashCode();
+ h = 31 * h + startLine;
+ h = 31 * h + endLine;
+ hash = h;
+ }
+ return h;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof Statement)) {
+ return false;
+ }
+ Statement other = (Statement) obj;
+ return startLine == other.startLine
+ && endLine == other.endLine
+ && value.equals(other.value);
+ }
+
+ @Override
+ public String toString() {
+ return "[" + getStartLine() + "-" + getEndLine() + "] [" + getValue() + "]";
+ }
+
+} \ No newline at end of file
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChannel.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChannel.java
new file mode 100644
index 00000000000..fe12fdf7c76
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChannel.java
@@ -0,0 +1,67 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.statement;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.sonar.duplications.statement.matcher.TokenMatcher;
+import org.sonar.duplications.token.Token;
+import org.sonar.duplications.token.TokenQueue;
+
+public final class StatementChannel {
+
+ private final TokenMatcher[] tokenMatchers;
+ private final boolean blackHole;
+
+ public static StatementChannel create(TokenMatcher... tokenMatchers) {
+ return new StatementChannel(false, tokenMatchers);
+ }
+
+ public static StatementChannel createBlackHole(TokenMatcher... tokenMatchers) {
+ return new StatementChannel(true, tokenMatchers);
+ }
+
+ private StatementChannel(boolean blackHole, TokenMatcher... tokenMatchers) {
+ if (tokenMatchers == null || tokenMatchers.length == 0) {
+ throw new IllegalArgumentException();
+ }
+ this.blackHole = blackHole;
+ this.tokenMatchers = tokenMatchers;
+ }
+
+ public boolean consume(TokenQueue tokenQueue, List<Statement> output) {
+ List<Token> matchedTokenList = new ArrayList<Token>();
+ for (TokenMatcher tokenMatcher : tokenMatchers) {
+ if (!tokenMatcher.matchToken(tokenQueue, matchedTokenList)) {
+ tokenQueue.pushForward(matchedTokenList);
+ return false;
+ }
+ }
+
+ // all matchers were successful, so now build the statement
+ // matchedTokenList.size() check is for case with ForgiveLastTokenMatcher
+ if (!blackHole && !matchedTokenList.isEmpty()) {
+ output.add(new Statement(matchedTokenList));
+ }
+ return true;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChannelDisptacher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChannelDisptacher.java
new file mode 100644
index 00000000000..c6020857636
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChannelDisptacher.java
@@ -0,0 +1,54 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+
+package org.sonar.duplications.statement;
+
+import java.util.List;
+
+import org.sonar.duplications.token.Token;
+import org.sonar.duplications.token.TokenQueue;
+
+public class StatementChannelDisptacher {
+
+ private final StatementChannel[] channels;
+
+ public StatementChannelDisptacher(List<StatementChannel> channels) {
+ this.channels = channels.toArray(new StatementChannel[channels.size()]);
+ }
+
+ public boolean consume(TokenQueue tokenQueue, List<Statement> statements) {
+ Token nextToken = tokenQueue.peek();
+ while (nextToken != null) {
+ boolean channelConsumed = false;
+ for (StatementChannel channel : channels) {
+ if (channel.consume(tokenQueue, statements)) {
+ channelConsumed = true;
+ break;
+ }
+ }
+ if (!channelConsumed) {
+ throw new IllegalStateException("None of the statement channel has been able to consume token: " + nextToken);
+ }
+ nextToken = tokenQueue.peek();
+ }
+ return true;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChunker.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChunker.java
new file mode 100644
index 00000000000..aeced1283b1
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChunker.java
@@ -0,0 +1,95 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.statement;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.sonar.duplications.DuplicationsException;
+import org.sonar.duplications.statement.matcher.TokenMatcher;
+import org.sonar.duplications.token.TokenQueue;
+
+public final class StatementChunker {
+
+ private final StatementChannelDisptacher channelDispatcher;
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ private StatementChunker(Builder builder) {
+ this.channelDispatcher = builder.getChannelDispatcher();
+ }
+
+ public List<Statement> chunk(TokenQueue tokenQueue) {
+ if (tokenQueue == null) {
+ throw new IllegalArgumentException();
+ }
+ List<Statement> statements = new ArrayList<Statement>();
+ try {
+ channelDispatcher.consume(tokenQueue, statements);
+ return statements;
+ } catch (Exception e) {
+ throw new DuplicationsException("Unable to build statement from token : " + tokenQueue.peek(), e);
+ }
+ }
+
+ /**
+ * Note that order is important, e.g.
+ * <code>statement(token(A)).ignore(token(A))</code> for the input sequence "A" will produce statement, whereas
+ * <code>ignore(token(A)).statement(token(A))</code> will not.
+ */
+ public static final class Builder {
+
+ private List<StatementChannel> channels = new ArrayList<StatementChannel>();
+
+ private Builder() {
+ }
+
+ public StatementChunker build() {
+ return new StatementChunker(this);
+ }
+
+ /**
+ * Defines that sequence of tokens must be ignored, if it matches specified list of matchers.
+ *
+ * @see TokenMatcherFactory
+ */
+ public Builder ignore(TokenMatcher... matchers) {
+ channels.add(StatementChannel.createBlackHole(matchers));
+ return this;
+ }
+
+ /**
+ * Defines that sequence of tokens, which is matched specified list of matchers, is a statement.
+ *
+ * @see TokenMatcherFactory
+ */
+ public Builder statement(TokenMatcher... matchers) {
+ channels.add(StatementChannel.create(matchers));
+ return this;
+ }
+
+ private StatementChannelDisptacher getChannelDispatcher() {
+ return new StatementChannelDisptacher(channels);
+ }
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/TokenMatcherFactory.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/TokenMatcherFactory.java
new file mode 100644
index 00000000000..7f0b3e15a10
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/TokenMatcherFactory.java
@@ -0,0 +1,65 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.statement;
+
+import org.sonar.duplications.statement.matcher.AnyTokenMatcher;
+import org.sonar.duplications.statement.matcher.BridgeTokenMatcher;
+import org.sonar.duplications.statement.matcher.ExactTokenMatcher;
+import org.sonar.duplications.statement.matcher.ForgetLastTokenMatcher;
+import org.sonar.duplications.statement.matcher.OptTokenMatcher;
+import org.sonar.duplications.statement.matcher.TokenMatcher;
+import org.sonar.duplications.statement.matcher.UptoTokenMatcher;
+
+public final class TokenMatcherFactory {
+
+ private TokenMatcherFactory() {
+ }
+
+ public static TokenMatcher from(String token) {
+ return new ExactTokenMatcher(token);
+ }
+
+ public static TokenMatcher to(String... tokens) {
+ return new UptoTokenMatcher(tokens);
+ }
+
+ public static TokenMatcher bridge(String lToken, String rToken) {
+ return new BridgeTokenMatcher(lToken, rToken);
+ }
+
+ public static TokenMatcher anyToken() {
+ // TODO Godin: we can return singleton instance
+ return new AnyTokenMatcher();
+ }
+
+ public static TokenMatcher opt(TokenMatcher optMatcher) {
+ return new OptTokenMatcher(optMatcher);
+ }
+
+ public static TokenMatcher forgetLastToken() {
+ // TODO Godin: we can return singleton instance
+ return new ForgetLastTokenMatcher();
+ }
+
+ public static TokenMatcher token(String token) {
+ return new ExactTokenMatcher(token);
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/AnyTokenMatcher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/AnyTokenMatcher.java
new file mode 100644
index 00000000000..44fd0db7f3e
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/AnyTokenMatcher.java
@@ -0,0 +1,41 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.statement.matcher;
+
+import java.util.List;
+
+import org.sonar.duplications.token.Token;
+import org.sonar.duplications.token.TokenQueue;
+
+/**
+ * Consumes any token, but only one.
+ */
+public class AnyTokenMatcher extends TokenMatcher {
+
+ /**
+ * @return always true
+ */
+ @Override
+ public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) {
+ matchedTokenList.add(tokenQueue.poll());
+ return true;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/BridgeTokenMatcher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/BridgeTokenMatcher.java
new file mode 100644
index 00000000000..490848e9f04
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/BridgeTokenMatcher.java
@@ -0,0 +1,64 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.statement.matcher;
+
+import java.util.List;
+
+import org.sonar.duplications.token.Token;
+import org.sonar.duplications.token.TokenQueue;
+
+/**
+ * Consumes everything between pair of tokens.
+ */
+public class BridgeTokenMatcher extends TokenMatcher {
+
+ private final String lToken;
+ private final String rToken;
+
+ public BridgeTokenMatcher(String lToken, String rToken) {
+ if (lToken == null || rToken == null) {
+ throw new IllegalArgumentException();
+ }
+ this.lToken = lToken;
+ this.rToken = rToken;
+ }
+
+ @Override
+ public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) {
+ if (!tokenQueue.isNextTokenValue(lToken)) {
+ return false;
+ }
+ int stack = 0;
+ while (tokenQueue.peek() != null) {
+ Token token = tokenQueue.poll();
+ if (lToken.equals(token.getValue())) {
+ stack++;
+ } else if (rToken.equals(token.getValue())) {
+ stack--;
+ }
+ matchedTokenList.add(token);
+ if (stack == 0) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/ExactTokenMatcher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/ExactTokenMatcher.java
new file mode 100644
index 00000000000..a21b8ba8c82
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/ExactTokenMatcher.java
@@ -0,0 +1,50 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.statement.matcher;
+
+import java.util.List;
+
+import org.sonar.duplications.token.Token;
+import org.sonar.duplications.token.TokenQueue;
+
+/**
+ * Consumes only one specified token.
+ */
+public class ExactTokenMatcher extends TokenMatcher {
+
+ private final String tokenToMatch;
+
+ public ExactTokenMatcher(String tokenToMatch) {
+ if (tokenToMatch == null) {
+ throw new IllegalArgumentException();
+ }
+ this.tokenToMatch = tokenToMatch;
+ }
+
+ @Override
+ public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) {
+ if (tokenQueue.isNextTokenValue(tokenToMatch)) {
+ matchedTokenList.add(tokenQueue.poll());
+ return true;
+ }
+ return false;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/ForgetLastTokenMatcher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/ForgetLastTokenMatcher.java
new file mode 100644
index 00000000000..06a8e7d801e
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/ForgetLastTokenMatcher.java
@@ -0,0 +1,41 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.statement.matcher;
+
+import java.util.List;
+
+import org.sonar.duplications.token.Token;
+import org.sonar.duplications.token.TokenQueue;
+
+/**
+ * Forgets token, which was consumed last.
+ */
+public class ForgetLastTokenMatcher extends TokenMatcher {
+
+ /**
+ * @return always true
+ */
+ @Override
+ public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) {
+ matchedTokenList.remove(matchedTokenList.size() - 1);
+ return true;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/OptTokenMatcher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/OptTokenMatcher.java
new file mode 100644
index 00000000000..a76ad0fdc0f
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/OptTokenMatcher.java
@@ -0,0 +1,50 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.statement.matcher;
+
+import java.util.List;
+
+import org.sonar.duplications.token.Token;
+import org.sonar.duplications.token.TokenQueue;
+
+/**
+ * Delegates consumption to another matcher.
+ */
+public class OptTokenMatcher extends TokenMatcher {
+
+ private final TokenMatcher matcher;
+
+ public OptTokenMatcher(TokenMatcher matcher) {
+ if (matcher == null) {
+ throw new IllegalArgumentException();
+ }
+ this.matcher = matcher;
+ }
+
+ /**
+ * @return always true
+ */
+ @Override
+ public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) {
+ matcher.matchToken(tokenQueue, matchedTokenList);
+ return true;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/TokenMatcher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/TokenMatcher.java
new file mode 100644
index 00000000000..4bc576e761a
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/TokenMatcher.java
@@ -0,0 +1,36 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.statement.matcher;
+
+import java.util.List;
+
+import org.sonar.duplications.token.Token;
+import org.sonar.duplications.token.TokenQueue;
+
+public abstract class TokenMatcher {
+
+ /**
+ * @param tokenQueue queue of tokens for consumption, which contains at least one element
+ * @param matchedTokenList list to populate by tokens, which were consumed
+ * @return true if tokens were consumed successfully
+ */
+ public abstract boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList);
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/UptoTokenMatcher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/UptoTokenMatcher.java
new file mode 100644
index 00000000000..729d8a4b064
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/UptoTokenMatcher.java
@@ -0,0 +1,61 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.statement.matcher;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.sonar.duplications.token.Token;
+import org.sonar.duplications.token.TokenQueue;
+
+/**
+ * Consumes everything up to one of the specified tokens.
+ */
+public class UptoTokenMatcher extends TokenMatcher {
+
+ private final Set<String> uptoMatchTokens = new HashSet<String>();
+
+ public UptoTokenMatcher(String[] uptoMatchTokens) {
+ if (uptoMatchTokens == null) {
+ throw new IllegalArgumentException();
+ }
+ if (uptoMatchTokens.length == 0) {
+ // otherwise we will always try to consume everything, but will never succeed
+ throw new IllegalArgumentException();
+ }
+ for (String uptoMatchToken : uptoMatchTokens) {
+ this.uptoMatchTokens.add(uptoMatchToken);
+ }
+ }
+
+ @Override
+ public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) {
+ do {
+ Token token = tokenQueue.poll();
+ matchedTokenList.add(token);
+ if (uptoMatchTokens.contains(token.getValue())) {
+ return true;
+ }
+ } while (tokenQueue.peek() != null);
+ return false;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/package-info.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/package-info.java
new file mode 100644
index 00000000000..176f1c5df22
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/package-info.java
@@ -0,0 +1,27 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+
+/**
+ * Provides a basic framework to create list of statements from list of tokens.
+ *
+ * The entry point of this framework is the {@link org.sonar.duplications.statement.StatementChunker} class.
+ */
+package org.sonar.duplications.statement;
+
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/token/BlackHoleTokenChannel.java b/sonar-duplications/src/main/java/org/sonar/duplications/token/BlackHoleTokenChannel.java
new file mode 100644
index 00000000000..560bce4301e
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/token/BlackHoleTokenChannel.java
@@ -0,0 +1,35 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.token;
+
+import org.sonar.channel.RegexChannel;
+
+class BlackHoleTokenChannel extends RegexChannel<TokenQueue> {
+
+ public BlackHoleTokenChannel(String regex) {
+ super(regex);
+ }
+
+ @Override
+ protected void consume(CharSequence token, TokenQueue output) {
+ // do nothing
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/token/Token.java b/sonar-duplications/src/main/java/org/sonar/duplications/token/Token.java
new file mode 100644
index 00000000000..aa251d8b84f
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/token/Token.java
@@ -0,0 +1,77 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.token;
+
+public class Token {
+
+ private final int line;
+ private final int column;
+ private final String value;
+
+ /**
+ * Cache for hash code.
+ */
+ private int hash;
+
+ public Token(String value, int line, int column) {
+ this.value = value;
+ this.column = column;
+ this.line = line;
+ }
+
+ public int getLine() {
+ return line;
+ }
+
+ public int getColumn() {
+ return column;
+ }
+
+ public String getValue() {
+ return value;
+ }
+
+ @Override
+ public boolean equals(Object object) {
+ if (object instanceof Token) {
+ Token anotherToken = (Token) object;
+ return anotherToken.line == line && anotherToken.column == column && anotherToken.value.equals(value);
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ int h = hash;
+ if (h == 0) {
+ h = value.hashCode();
+ h = 31 * h + line;
+ h = 31 * h + column;
+ hash = h;
+ }
+ return h;
+ }
+
+ @Override
+ public String toString() {
+ return "'" + value + "'[" + line + "," + column + "]";
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChannel.java b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChannel.java
new file mode 100644
index 00000000000..11e93b326cf
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChannel.java
@@ -0,0 +1,59 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.token;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.sonar.channel.Channel;
+import org.sonar.channel.CodeBuffer.Cursor;
+import org.sonar.channel.CodeReader;
+
+class TokenChannel extends Channel<TokenQueue> {
+
+ private final StringBuilder tmpBuilder = new StringBuilder();
+ private final Matcher matcher;
+ private String normalizationValue;
+
+ public TokenChannel(String regex) {
+ matcher = Pattern.compile(regex).matcher("");
+ }
+
+ public TokenChannel(String regex, String normalizationValue) {
+ this(regex);
+ this.normalizationValue = normalizationValue;
+ }
+
+ @Override
+ public boolean consume(CodeReader code, TokenQueue output) {
+ if (code.popTo(matcher, tmpBuilder) > 0) {
+ Cursor previousCursor = code.getPreviousCursor(); // see SONAR-2499
+ if (normalizationValue != null) {
+ output.add(new Token(normalizationValue, previousCursor.getLine(), previousCursor.getColumn()));
+ } else {
+ output.add(new Token(tmpBuilder.toString(), previousCursor.getLine(), previousCursor.getColumn()));
+ }
+ tmpBuilder.setLength(0); // Godin: note that other channels use method delete in order to do the same thing
+ return true;
+ }
+ return false;
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java
new file mode 100644
index 00000000000..bb9f5d3cf98
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java
@@ -0,0 +1,141 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.token;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringReader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.io.IOUtils;
+import org.sonar.channel.Channel;
+import org.sonar.channel.ChannelDispatcher;
+import org.sonar.channel.CodeReader;
+import org.sonar.channel.CodeReaderConfiguration;
+import org.sonar.duplications.DuplicationsException;
+
+public final class TokenChunker {
+
+ /**
+ * Some files can have large tokens (e.g. javadocs), so large buffer required.
+ *
+ * @see CodeReaderConfiguration#DEFAULT_BUFFER_CAPACITY
+ */
+ private final static int BUFFER_CAPACITY = 80000;
+
+ private final Charset charset;
+ private final ChannelDispatcher<TokenQueue> channelDispatcher;
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ private TokenChunker(Builder builder) {
+ this.charset = builder.charset;
+ this.channelDispatcher = builder.getChannelDispatcher();
+ }
+
+ public TokenQueue chunk(String sourceCode) {
+ return chunk(new StringReader(sourceCode));
+ }
+
+ public TokenQueue chunk(File file) {
+ InputStreamReader reader = null;
+ try {
+ reader = new InputStreamReader(new FileInputStream(file), charset);
+ return chunk(reader);
+ } catch (Exception e) {
+ throw new DuplicationsException("Unable to lex file : " + file.getAbsolutePath(), e);
+ } finally {
+ IOUtils.closeQuietly(reader);
+ }
+ }
+
+ public TokenQueue chunk(Reader reader) {
+ CodeReaderConfiguration codeReaderConfiguration = new CodeReaderConfiguration();
+ codeReaderConfiguration.setBufferCapacity(BUFFER_CAPACITY);
+ CodeReader code = new CodeReader(reader, codeReaderConfiguration);
+ TokenQueue queue = new TokenQueue();
+ try {
+ channelDispatcher.consume(code, queue);
+ return queue;
+ } catch (Exception e) {
+ throw new DuplicationsException("Unable to lex source code at line : " + code.getLinePosition() + " and column : "
+ + code.getColumnPosition(), e);
+ }
+ }
+
+ /**
+ * Note that order is important, e.g.
+ * <code>token("A").ignore("A")</code> for the input string "A" will produce token, whereas
+ * <code>ignore("A").token("A")</code> will not.
+ */
+ public static final class Builder {
+
+ private List<Channel> channels = new ArrayList<Channel>();
+ private Charset charset = Charset.defaultCharset();
+
+ private Builder() {
+ }
+
+ public TokenChunker build() {
+ return new TokenChunker(this);
+ }
+
+ /**
+ * Defines that sequence of characters must be ignored, if it matches specified regular expression.
+ */
+ public Builder ignore(String regularExpression) {
+ channels.add(new BlackHoleTokenChannel(regularExpression));
+ return this;
+ }
+
+ /**
+ * Defines that sequence of characters, which is matched specified regular expression, is a token.
+ */
+ public Builder token(String regularExpression) {
+ channels.add(new TokenChannel(regularExpression));
+ return this;
+ }
+
+ /**
+ * Defines that sequence of characters, which is matched specified regular expression, is a token with specified value.
+ */
+ public Builder token(String regularExpression, String normalizationValue) {
+ channels.add(new TokenChannel(regularExpression, normalizationValue));
+ return this;
+ }
+
+ private ChannelDispatcher<TokenQueue> getChannelDispatcher() {
+ return new ChannelDispatcher<TokenQueue>(channels);
+ }
+
+ public Builder setCharset(Charset charset) {
+ this.charset = charset;
+ return this;
+ }
+
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenQueue.java b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenQueue.java
new file mode 100644
index 00000000000..6fcd544e27e
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenQueue.java
@@ -0,0 +1,85 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.token;
+
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.ListIterator;
+
+public class TokenQueue implements Iterable<Token> {
+
+ private final LinkedList<Token> tokenQueue;
+
+ public TokenQueue(List<Token> tokenList) {
+ tokenQueue = new LinkedList<Token>(tokenList);
+ }
+
+ public TokenQueue() {
+ tokenQueue = new LinkedList<Token>();
+ }
+
+ /**
+ * Retrieves, but does not remove, token from this queue.
+ *
+ * @return token from this queue, or <tt>null</tt> if this queue is empty.
+ */
+ public Token peek() {
+ return tokenQueue.peek();
+ }
+
+ /**
+ * Retrieves and removes token from this queue.
+ *
+ * @return token from this queue, or <tt>null</tt> if this queue is empty.
+ */
+ public Token poll() {
+ return tokenQueue.poll();
+ }
+
+ public int size() {
+ return tokenQueue.size();
+ }
+
+ public void add(Token token) {
+ tokenQueue.addLast(token);
+ }
+
+ public boolean isNextTokenValue(String expectedValue) {
+ Token nextToken = tokenQueue.peek();
+ if (nextToken == null) {
+ // queue is empty
+ return false;
+ }
+ return nextToken.getValue().equals(expectedValue);
+ }
+
+ public Iterator<Token> iterator() {
+ return tokenQueue.iterator();
+ }
+
+ public void pushForward(List<Token> matchedTokenList) {
+ ListIterator<Token> iter = matchedTokenList.listIterator(matchedTokenList.size());
+ while (iter.hasPrevious()) {
+ tokenQueue.addFirst(iter.previous());
+ }
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/token/package-info.java b/sonar-duplications/src/main/java/org/sonar/duplications/token/package-info.java
new file mode 100644
index 00000000000..a34f69058bc
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/token/package-info.java
@@ -0,0 +1,27 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+
+/**
+ * Provides a basic framework to sequentially read any kind of character stream and create list of tokens.
+ *
+ * The entry point of this framework is the {@link org.sonar.duplications.token.TokenChunker} class.
+ */
+package org.sonar.duplications.token;
+
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/utils/FastStringComparator.java b/sonar-duplications/src/main/java/org/sonar/duplications/utils/FastStringComparator.java
new file mode 100644
index 00000000000..319f26faf3e
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/utils/FastStringComparator.java
@@ -0,0 +1,57 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.utils;
+
+import java.util.Comparator;
+
+/**
+ * More efficient (in terms of performance) implementation of a String comparator.
+ * Speed is gained by using hash code as a primary comparison attribute, which is cached for String.
+ * Be aware that this ordering is not lexicographic, however stable.
+ */
+public final class FastStringComparator implements Comparator<String> {
+
+ public static final FastStringComparator INSTANCE = new FastStringComparator();
+
+ /**
+ * Compares two strings (not lexicographically).
+ */
+ public int compare(String s1, String s2) {
+ if (s1 == s2) { // NOSONAR false-positive: Compare Objects With Equals
+ return 0;
+ }
+ int h1 = s1.hashCode();
+ int h2 = s2.hashCode();
+ if (h1 < h2) {
+ return -1;
+ } else if (h1 > h2) {
+ return 1;
+ } else {
+ return s1.compareTo(s2);
+ }
+ }
+
+ /**
+ * Enforce use of a singleton instance.
+ */
+ private FastStringComparator() {
+ }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/utils/SortedListsUtils.java b/sonar-duplications/src/main/java/org/sonar/duplications/utils/SortedListsUtils.java
new file mode 100644
index 00000000000..39b482e61ae
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/utils/SortedListsUtils.java
@@ -0,0 +1,62 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2011 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.utils;
+
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * Provides utility methods for sorted lists.
+ */
+public final class SortedListsUtils {
+
+ /**
+ * Returns true if all elements from second list also presented in first list.
+ * Both lists must be sorted.
+ * And both must implement {@link java.util.RandomAccess}, otherwise this method is inefficient in terms of performance.
+ * Running time - O(|container| + |list|).
+ */
+ public static <T> boolean contains(List<T> container, List<T> list, Comparator<T> comparator) {
+ int j = 0;
+ for (int i = 0; i < list.size(); i++) {
+ T e1 = list.get(i);
+ boolean found = false;
+ for (; j < container.size(); j++) {
+ T e2 = container.get(j);
+ int c = comparator.compare(e1, e2);
+ if (c == 0) {
+ found = true;
+ break;
+ } else if (c < 0) {
+ // e1 is less than e2 - stop search, because all next elements e2 would be greater than e1
+ return false;
+ }
+ }
+ if (!found) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private SortedListsUtils() {
+ }
+
+}