diff options
author | Evgeny Mandrikov <mandrikov@gmail.com> | 2011-09-07 15:20:43 +0400 |
---|---|---|
committer | Evgeny Mandrikov <mandrikov@gmail.com> | 2011-09-07 15:41:46 +0400 |
commit | 36bcc8dbc17d720a9d6bb81a36dacd0966341f32 (patch) | |
tree | 4e7ab0e15c840d6d0e31696d8420891217a35826 /sonar-duplications/src/main/java/org/sonar | |
parent | 081adad23e053449b2f187c2e74d481bb1b25feb (diff) | |
download | sonarqube-36bcc8dbc17d720a9d6bb81a36dacd0966341f32.tar.gz sonarqube-36bcc8dbc17d720a9d6bb81a36dacd0966341f32.zip |
SONAR-1091 Update library for detection of duplicates
Diffstat (limited to 'sonar-duplications/src/main/java/org/sonar')
38 files changed, 3177 insertions, 0 deletions
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/DuplicationsException.java b/sonar-duplications/src/main/java/org/sonar/duplications/DuplicationsException.java new file mode 100644 index 00000000000..27e7dcdc223 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/DuplicationsException.java @@ -0,0 +1,31 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications; + +public class DuplicationsException extends RuntimeException { + + public DuplicationsException(String message) { + super(message); + } + + public DuplicationsException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java b/sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java new file mode 100644 index 00000000000..aac5a806ee6 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java @@ -0,0 +1,107 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.block; + +/** + * Represents part of source code between two lines. + * If two blocks have the same {@link #getBlockHash() hash}, then we assume that there is a duplication in a code, which they represent. + */ +public final class Block { + + private final String resourceId; + private final ByteArray blockHash; + private final int indexInFile; + private final int firstLineNumber; + private final int lastLineNumber; + + /** + * Cache for hash code. + */ + private int hash; + + public Block(String resourceId, ByteArray blockHash, int indexInFile, int firstLineNumber, int lastLineNumber) { + this.resourceId = resourceId; + this.blockHash = blockHash; + this.indexInFile = indexInFile; + this.firstLineNumber = firstLineNumber; + this.lastLineNumber = lastLineNumber; + } + + public Block(int indexInFile, int firstLineNumber, int lastLineNumber, String resourceId, String hash) { + this(resourceId, new ByteArray(hash), indexInFile, firstLineNumber, lastLineNumber); + } + + public String getHashHex() { + return getBlockHash().toString(); + } + + public String getResourceId() { + return resourceId; + } + + public ByteArray getBlockHash() { + return blockHash; + } + + public int getIndexInFile() { + return indexInFile; + } + + public int getFirstLineNumber() { + return firstLineNumber; + } + + public int getLastLineNumber() { + return lastLineNumber; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof Block)) { + return false; + } + Block other = (Block) obj; + return resourceId.equals(other.resourceId) + && blockHash.equals(other.blockHash) + && indexInFile == other.indexInFile + && firstLineNumber == other.firstLineNumber + && lastLineNumber == other.lastLineNumber; + } + + @Override + public int hashCode() { + int h = hash; + if (h == 0) { + h = resourceId.hashCode(); + h = 31 * h + blockHash.hashCode(); + h = 31 * h + indexInFile; + h = 31 * h + firstLineNumber; + h = 31 * h + lastLineNumber; + hash = h; + } + return h; + } + + @Override + public String toString() { + return "'" + resourceId + "'[" + indexInFile + "|" + firstLineNumber + "-" + lastLineNumber + "]:" + blockHash; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java b/sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java new file mode 100644 index 00000000000..2232a3f9a71 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java @@ -0,0 +1,87 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.block; + +import java.util.Collections; +import java.util.List; + +import org.sonar.duplications.statement.Statement; + +import com.google.common.collect.Lists; + +/** + * Creates blocks from statements, each block will contain specified number of statements (<code>blockSize</code>) and 64-bits (8-bytes) hash value. + * Hash value computed using + * <a href="http://en.wikipedia.org/wiki/Rolling_hash#Rabin-Karp_rolling_hash">Rabin-Karp rolling hash</a> : + * <blockquote><pre> + * s[0]*31^(blockSize-1) + s[1]*31^(blockSize-2) + ... + s[blockSize-1] + * </pre></blockquote> + * using <code>long</code> arithmetic, where <code>s[i]</code> + * is the hash code of <code>String</code> (which is cached) for statement with number i. + * Thus running time - O(N), where N - number of statements. + * Implementation fully thread-safe. + */ +public class BlockChunker { + + private static final long PRIME_BASE = 31; + + private final int blockSize; + private final long power; + + public BlockChunker(int blockSize) { + this.blockSize = blockSize; + + long pow = 1; + for (int i = 0; i < blockSize - 1; i++) { + pow = pow * PRIME_BASE; + } + this.power = pow; + } + + public List<Block> chunk(String resourceId, List<Statement> statements) { + if (statements.size() < blockSize) { + return Collections.emptyList(); + } + Statement[] statementsArr = statements.toArray(new Statement[statements.size()]); + List<Block> blocks = Lists.newArrayListWithCapacity(statementsArr.length - blockSize + 1); + long hash = 0; + int first = 0; + int last = 0; + for (; last < blockSize - 1; last++) { + hash = hash * PRIME_BASE + statementsArr[last].getValue().hashCode(); + } + for (; last < statementsArr.length; last++, first++) { + Statement firstStatement = statementsArr[first]; + Statement lastStatement = statementsArr[last]; + // add last statement to hash + hash = hash * PRIME_BASE + lastStatement.getValue().hashCode(); + // create block + blocks.add(new Block(resourceId, new ByteArray(hash), first, firstStatement.getStartLine(), lastStatement.getEndLine())); + // remove first statement from hash + hash -= power * firstStatement.getValue().hashCode(); + } + return blocks; + } + + public int getBlockSize() { + return blockSize; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/block/ByteArray.java b/sonar-duplications/src/main/java/org/sonar/duplications/block/ByteArray.java new file mode 100644 index 00000000000..d3d97a19631 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/block/ByteArray.java @@ -0,0 +1,131 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.block; + +import java.nio.ByteBuffer; +import java.nio.IntBuffer; +import java.util.Arrays; + +/** + * Represents hash for {@link Block}. Immutable. + * + * TODO Godin: would be better to rename to BlockHash, + * also maybe we can incorporate it into Block to reduce memory footprint during detection of duplicates + */ +public final class ByteArray { + + private final byte[] bytes; + + /** + * Cache for hash code. + */ + private int hash; + + public ByteArray(String hexString) { + int len = hexString.length(); + this.bytes = new byte[len / 2]; + for (int i = 0; i < len; i += 2) { + bytes[i / 2] = (byte) ((Character.digit(hexString.charAt(i), 16) << 4) + Character.digit(hexString.charAt(i + 1), 16)); + } + } + + public ByteArray(byte[] bytes) { + this.bytes = new byte[bytes.length]; + System.arraycopy(bytes, 0, this.bytes, 0, bytes.length); + } + + public ByteArray(long value) { + this.bytes = new byte[] { + (byte) (value >>> 56), + (byte) (value >>> 48), + (byte) (value >>> 40), + (byte) (value >>> 32), + (byte) (value >>> 24), + (byte) (value >>> 16), + (byte) (value >>> 8), + (byte) value }; + } + + public ByteArray(int value) { + this.bytes = new byte[] { + (byte) (value >>> 24), + (byte) (value >>> 16), + (byte) (value >>> 8), + (byte) value }; + } + + public ByteArray(int[] intArray) { + ByteBuffer bb = ByteBuffer.allocate(intArray.length * 4); + for (int i : intArray) { + bb.putInt(i); + } + this.bytes = bb.array(); + } + + public int[] toIntArray() { + int size = (bytes.length / 4) + (bytes.length % 4 == 0 ? 0 : 1); // Pad the size to multiple of 4 + ByteBuffer bb = ByteBuffer.allocate(size * 4); + bb.put(bytes); + bb.rewind(); + IntBuffer ib = bb.asIntBuffer(); + int[] result = new int[size]; + ib.get(result); + return result; + } + + private static final String HEXES = "0123456789abcdef"; + + public String toHexString() { + StringBuilder hex = new StringBuilder(2 * bytes.length); + for (byte b : bytes) { + hex.append(HEXES.charAt((b & 0xF0) >> 4)).append(HEXES.charAt((b & 0x0F))); + } + return hex.toString(); + } + + @Override + public String toString() { + return toHexString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ByteArray other = (ByteArray) o; + return Arrays.equals(bytes, other.bytes); + } + + @Override + public int hashCode() { + int h = hash; + int len = bytes.length; + if (h == 0 && len > 0) { + h = Arrays.hashCode(bytes); + hash = h; + } + return h; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/BlocksGroup.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/BlocksGroup.java new file mode 100644 index 00000000000..55dc41e2298 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/BlocksGroup.java @@ -0,0 +1,212 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.detector.original; + +import java.util.Comparator; +import java.util.List; + +import org.sonar.duplications.block.Block; +import org.sonar.duplications.utils.FastStringComparator; + +import com.google.common.collect.Lists; + +/** + * Set of {@link Block}s, which internally stored as a sorted list. + */ +final class BlocksGroup { + + /** + * Factory method. + * + * @return new empty group + */ + public static BlocksGroup empty() { + return new BlocksGroup(); + } + + protected final List<Block> blocks; + + private BlocksGroup() { + this.blocks = Lists.newArrayList(); + } + + public int size() { + return blocks.size(); + } + + /** + * @return true, if this group subsumed by specified group + * @see #subsumedBy(BlocksGroup, BlocksGroup, int) + */ + public boolean subsumedBy(BlocksGroup other, int indexCorrection) { + return subsumedBy(this, other, indexCorrection); + } + + /** + * @return intersection of this group with specified + * @see #intersect(BlocksGroup, BlocksGroup) + */ + public BlocksGroup intersect(BlocksGroup other) { + return intersect(this, other); + } + + public List<Block[]> pairs(BlocksGroup other, int len) { + return pairs(this, other, len); + } + + /** + * First block from this group with specified resource id. + */ + public Block first(String resourceId) { + for (Block block : blocks) { + if (resourceId.equals(block.getResourceId())) { + return block; + } + } + return null; + } + + @Override + public String toString() { + return blocks.toString(); + } + + /** + * Intersection of two groups is a group, which contains blocks from second group that have corresponding block from first group + * with same resource id and with corrected index. + */ + private static BlocksGroup intersect(BlocksGroup group1, BlocksGroup group2) { + BlocksGroup intersection = new BlocksGroup(); + List<Block> list1 = group1.blocks; + List<Block> list2 = group2.blocks; + int i = 0; + int j = 0; + while (i < list1.size() && j < list2.size()) { + Block block1 = list1.get(i); + Block block2 = list2.get(j); + int c = RESOURCE_ID_COMPARATOR.compare(block1.getResourceId(), block2.getResourceId()); + if (c > 0) { + j++; + continue; + } + if (c < 0) { + i++; + continue; + } + if (c == 0) { + c = block1.getIndexInFile() + 1 - block2.getIndexInFile(); + } + if (c == 0) { // list1[i] == list2[j] + i++; + j++; + intersection.blocks.add(block2); + } + if (c > 0) { // list1[i] > list2[j] + j++; + } + if (c < 0) { // list1[i] < list2[j] + i++; + } + } + return intersection; + } + + /** + * One group is subsumed by another group, when each block from first group has corresponding block from second group + * with same resource id and with corrected index. + */ + private static boolean subsumedBy(BlocksGroup group1, BlocksGroup group2, int indexCorrection) { + List<Block> list1 = group1.blocks; + List<Block> list2 = group2.blocks; + int i = 0; + int j = 0; + while (i < list1.size() && j < list2.size()) { + Block block1 = list1.get(i); + Block block2 = list2.get(j); + int c = RESOURCE_ID_COMPARATOR.compare(block1.getResourceId(), block2.getResourceId()); + if (c != 0) { + j++; + continue; + } + if (c == 0) { + c = block1.getIndexInFile() - indexCorrection - block2.getIndexInFile(); + } + if (c < 0) { // list1[i] < list2[j] + break; + } + if (c != 0) { // list1[i] != list2[j] + j++; + } + if (c == 0) { // list1[i] == list2[j] + i++; + j++; + } + } + return i == list1.size(); + } + + private static List<Block[]> pairs(BlocksGroup beginGroup, BlocksGroup endGroup, int len) { + List<Block[]> result = Lists.newArrayList(); + List<Block> beginBlocks = beginGroup.blocks; + List<Block> endBlocks = endGroup.blocks; + int i = 0; + int j = 0; + while (i < beginBlocks.size() && j < endBlocks.size()) { + Block beginBlock = beginBlocks.get(i); + Block endBlock = endBlocks.get(j); + int c = RESOURCE_ID_COMPARATOR.compare(beginBlock.getResourceId(), endBlock.getResourceId()); + if (c == 0) { + c = beginBlock.getIndexInFile() + len - 1 - endBlock.getIndexInFile(); + } + if (c == 0) { + result.add(new Block[] { beginBlock, endBlock }); + i++; + j++; + } + if (c > 0) { + j++; + } + if (c < 0) { + i++; + } + } + return result; + } + + private static final Comparator<String> RESOURCE_ID_COMPARATOR = FastStringComparator.INSTANCE; + + /** + * Compares {@link Block}s first using {@link Block#getResourceId() resource id} and then using {@link Block#getIndexInFile() index in file}. + */ + public static class BlockComparator implements Comparator<Block> { + + public static final BlockComparator INSTANCE = new BlockComparator(); + + public int compare(Block b1, Block b2) { + int c = RESOURCE_ID_COMPARATOR.compare(b1.getResourceId(), b2.getResourceId()); + if (c == 0) { + return b1.getIndexInFile() - b2.getIndexInFile(); + } + return c; + } + + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java new file mode 100644 index 00000000000..3574b2fed2e --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/Filter.java @@ -0,0 +1,154 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.detector.original; + +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; + +import org.sonar.duplications.index.CloneGroup; +import org.sonar.duplications.index.ClonePart; +import org.sonar.duplications.utils.FastStringComparator; +import org.sonar.duplications.utils.SortedListsUtils; + +import com.google.common.collect.Lists; + +/** + * Performs incremental and brute force algorithm in order to filter clones, which are fully covered by other clones. + * All clones for filtering must be of the same origin - there is no sanity check on this. + * In a worst case it performs O(N*N) comparisons. + * <p> + * Godin: This implementation was chosen because it simple. + * And I wasn't able to find big difference in performance with an interval tree, which we had for the moment of testing. + * Whereas in fact I expected that interval tree would be better for this task. + * Moreover with interval tree we also can use incremental approach, + * but we did not had an implementation with remove operation for the moment of testing. + * </p> + */ +final class Filter { + + /** + * Note that LinkedList should provide better performance here, because of use of operation remove. + * + * @see #add(CloneGroup) + */ + private final List<CloneGroup> filtered = Lists.newLinkedList(); + + /** + * @return current results of filtering + */ + public List<CloneGroup> getResult() { + return filtered; + } + + /** + * Running time - O(N*2*C), where N - number of clones, which was found earlier and C - time of {@link #containsIn(CloneGroup, CloneGroup)}. + */ + public void add(CloneGroup current) { + Iterator<CloneGroup> i = filtered.iterator(); + while (i.hasNext()) { + CloneGroup earlier = i.next(); + // Note that following two conditions cannot be true together - proof by contradiction: + // let C be the current clone and A and B were found earlier + // then since relation is transitive - (A in C) and (C in B) => (A in B) + // so A should be filtered earlier + if (Filter.containsIn(current, earlier)) { + // current clone fully covered by clone, which was found earlier + return; + } + if (Filter.containsIn(earlier, current)) { + // current clone fully covers clone, which was found earlier + i.remove(); + } + } + filtered.add(current); + } + + /** + * Checks that second clone contains first one. + * <p> + * Clone A is contained in another clone B, if every part pA from A has part pB in B, + * which satisfy the conditions: + * <pre> + * (pA.resourceId == pB.resourceId) and (pB.unitStart <= pA.unitStart) and (pA.unitEnd <= pB.unitEnd) + * </pre> + * And all resourcesId from B exactly the same as all resourceId from A, which means that also every part pB from B has part pA in A, + * which satisfy the condition: + * <pre> + * pB.resourceId == pA.resourceId + * </pre> + * So this relation is: + * <ul> + * <li>reflexive - A in A</li> + * <li>transitive - (A in B) and (B in C) => (A in C)</li> + * <li>antisymmetric - (A in B) and (B in A) <=> (A = B)</li> + * </ul> + * </p> + * <p> + * <strong>Important: this method relies on fact that all parts were already sorted by resourceId and unitStart by using + * {@link BlocksGroup.BlockComparator}, which uses {@link FastStringComparator} for comparison by resourceId.</strong> + * </p> + * <p> + * Running time - O(|A|+|B|). + * </p> + */ + static boolean containsIn(CloneGroup first, CloneGroup second) { + if (first.getCloneUnitLength() > second.getCloneUnitLength()) { + return false; + } + List<ClonePart> firstParts = first.getCloneParts(); + List<ClonePart> secondParts = second.getCloneParts(); + return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(first.getCloneUnitLength(), second.getCloneUnitLength())) + && SortedListsUtils.contains(firstParts, secondParts, RESOURCE_ID_COMPARATOR); + } + + private static final Comparator<ClonePart> RESOURCE_ID_COMPARATOR = new Comparator<ClonePart>() { + public int compare(ClonePart o1, ClonePart o2) { + return FastStringComparator.INSTANCE.compare(o1.getResourceId(), o2.getResourceId()); + } + }; + + private static final class ContainsInComparator implements Comparator<ClonePart> { + private final int l1, l2; + + public ContainsInComparator(int l1, int l2) { + this.l1 = l1; + this.l2 = l2; + } + + public int compare(ClonePart o1, ClonePart o2) { + int c = FastStringComparator.INSTANCE.compare(o1.getResourceId(), o2.getResourceId()); + if (c == 0) { + if (o2.getUnitStart() <= o1.getUnitStart()) { + if (o1.getUnitStart() + l1 <= o2.getUnitStart() + l2) { + return 0; // match found - stop search + } else { + return 1; // continue search + } + } else { + return -1; // o1 < o2 by unitStart - stop search + } + } else { + return c; + } + } + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/OriginalCloneDetectionAlgorithm.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/OriginalCloneDetectionAlgorithm.java new file mode 100644 index 00000000000..c6eb043d2b1 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/original/OriginalCloneDetectionAlgorithm.java @@ -0,0 +1,226 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.detector.original; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import org.sonar.duplications.block.Block; +import org.sonar.duplications.block.ByteArray; +import org.sonar.duplications.index.CloneGroup; +import org.sonar.duplications.index.CloneIndex; +import org.sonar.duplications.index.ClonePart; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +/** + * Implementation of algorithm described in paper + * <a href="http://www4.in.tum.de/~juergens/publications/icsm2010_crc.pdf">Index-Based Code Clone Detection: Incremental, Distributed, Scalable</a> + * by Benjamin Hummel, Elmar Juergens, Michael Conradt and Lars Heinemann. + */ +public final class OriginalCloneDetectionAlgorithm { + + /** + * Performs detection and returns list of clone groups between file (which represented as a collection of blocks) and index. + * Note that this method ignores blocks for this file, that will be retrieved from index. + */ + public static List<CloneGroup> detect(CloneIndex cloneIndex, Collection<Block> fileBlocks) { + if (fileBlocks.isEmpty()) { + return Collections.EMPTY_LIST; + } + OriginalCloneDetectionAlgorithm reporter = new OriginalCloneDetectionAlgorithm(cloneIndex); + reporter.findClones(fileBlocks); + return reporter.filter.getResult(); + } + + private final CloneIndex cloneIndex; + + private final Filter filter = new Filter(); + + private String originResourceId; + + private OriginalCloneDetectionAlgorithm(CloneIndex cloneIndex) { + this.cloneIndex = cloneIndex; + } + + private BlocksGroup[] createGroups(Collection<Block> fileBlocks) { + // 2: let f be the list of tuples corresponding to filename sorted by statement index + // either read from the index or calculated on the fly + int size = fileBlocks.size(); + + // Godin: create one group per unique hash + Map<ByteArray, BlocksGroup> groupsByHash = Maps.newHashMap(); // TODO Godin: can we create map with expected size? + for (Block fileBlock : fileBlocks) { + ByteArray hash = fileBlock.getBlockHash(); + BlocksGroup sameHash = groupsByHash.get(hash); + if (sameHash == null) { + sameHash = BlocksGroup.empty(); + groupsByHash.put(hash, sameHash); + } + sameHash.blocks.add(fileBlock); + } + + // Godin: retrieve blocks from index + for (Map.Entry<ByteArray, BlocksGroup> entry : groupsByHash.entrySet()) { + ByteArray hash = entry.getKey(); + BlocksGroup group = entry.getValue(); + for (Block blockFromIndex : cloneIndex.getBySequenceHash(hash)) { + // Godin: skip blocks for this file if they come from index + if (!originResourceId.equals(blockFromIndex.getResourceId())) { + group.blocks.add(blockFromIndex); + } + } + Collections.sort(group.blocks, BlocksGroup.BlockComparator.INSTANCE); + } + + // 3: let c be a list with c(0) = empty + BlocksGroup[] sameHashBlocksGroups = new BlocksGroup[size + 2]; + sameHashBlocksGroups[0] = BlocksGroup.empty(); + // 4: for i := 1 to length(f) do + for (Block fileBlock : fileBlocks) { + ByteArray hash = fileBlock.getBlockHash(); + int i = fileBlock.getIndexInFile() + 1; + // 5: retrieve tuples with same sequence hash as f(i) + // 6: store this set as c(i) + sameHashBlocksGroups[i] = groupsByHash.get(hash); + } + + // Godin: allows to report clones at the end of file, because condition at line 13 would be evaluated as true + sameHashBlocksGroups[size + 1] = BlocksGroup.empty(); + + return sameHashBlocksGroups; + } + + private void findClones(Collection<Block> fileBlocks) { + originResourceId = fileBlocks.iterator().next().getResourceId(); + + BlocksGroup[] sameHashBlocksGroups = createGroups(fileBlocks); + + // 7: for i := 1 to length(c) do + for (int i = 1; i < sameHashBlocksGroups.length; i++) { + // In the main loop (starting from Line 7), we first check + // whether any new clones might start at this position. If there + // is only a single tuple with this hash (which has to belong + // to the inspected file at the current location) we skip this loop + // iteration. The same holds if all tuples at position i have already + // been present at position i − 1, as in this case any clone group + // found at position i would be included in a clone group starting + // at position i − 1. + + // Although we use the subset operator in the + // algorithm description, this is not really a subset operation, + // as of course the statement index of the tuples in c(i) will be + // increased by 1 compared to the corresponding ones in c(i − 1) + // and the hash and info fields will differ. + + // 8: if |c(i)| < 2 or c(i) subsumed by c(i - 1) then + if (sameHashBlocksGroups[i].size() < 2 || sameHashBlocksGroups[i].subsumedBy(sameHashBlocksGroups[i - 1], 1)) { + // 9: continue with next loop iteration + continue; + } + + // The set a introduced in Line 10 is called the active set and + // contains all tuples corresponding to clones which have not yet + // been reported. At each iteration of the inner loop the set a + // is reduced to tuples which are also present in c(j); again the + // intersection operator has to account for the increased statement + // index and different hash and info fields. The new value is + // stored in a0. Clones are only reported, if tuples are lost in + // Line 12, as otherwise all current clones could be prolonged + // by one statement. Clone reporting matches tuples that, after + // correction of the statement index, appear in both c(i) and a; + // each matched pair corresponds to a single clone. Its location + // can be extracted from the filename and info fields. + + // 10: let a := c(i) + BlocksGroup currentBlocksGroup = sameHashBlocksGroups[i]; + // 11: for j := i + 1 to length(c) do + for (int j = i + 1; j < sameHashBlocksGroups.length; j++) { + // 12: let a0 := a intersect c(j) + BlocksGroup intersectedBlocksGroup = currentBlocksGroup.intersect(sameHashBlocksGroups[j]); + + // 13: if |a0| < |a| then + if (intersectedBlocksGroup.size() < currentBlocksGroup.size()) { + // 14: report clones from c(i) to a (see text) + + // One problem of this algorithm is that clone classes with + // multiple instances in the same file are encountered and + // reported multiple times. Furthermore, when calculating the clone + // groups for all files in a system, clone groups will be reported + // more than once as well. Both cases can be avoided, by + // checking whether the first element of a0 (with respect to a + // fixed order) is equal to f(j) and only report in this case. + + Block first = currentBlocksGroup.first(originResourceId); + if (first.getIndexInFile() == j - 2) { + // Godin: We report clones, which start in i-1 and end in j-2, so length is j-2-(i-1)+1=j-i + reportClones(sameHashBlocksGroups[i], currentBlocksGroup, j - i); + } + } + // 15: a := a0 + currentBlocksGroup = intersectedBlocksGroup; + + // Line 16 early exits the inner loop if either no more clones are starting + // from position i (i.e., a is too small), or if all tuples from a + // have already been in c(i − 1), corrected for statement index. + // In this case they have already been reported in the previous + // iteration of the outer loop. + + // IMPORTANT Godin: note that difference in indexes between "a" and "c(i-1)" greater than one, + // so method subsumedBy should take this into account + + // 16: if |a| < 2 or a subsumed by c(i-1) then + if (currentBlocksGroup.size() < 2 || currentBlocksGroup.subsumedBy(sameHashBlocksGroups[i - 1], j - i + 1)) { + // 17: break inner loop + break; + } + } + } + } + + private void reportClones(BlocksGroup beginGroup, BlocksGroup endGroup, int cloneLength) { + List<Block[]> pairs = beginGroup.pairs(endGroup, cloneLength); + + ClonePart origin = null; + List<ClonePart> parts = Lists.newArrayList(); + + for (int i = 0; i < pairs.size(); i++) { + Block[] pair = pairs.get(i); + ClonePart part = new ClonePart(pair[0].getResourceId(), pair[0].getIndexInFile(), pair[0].getFirstLineNumber(), pair[1].getLastLineNumber()); + parts.add(part); + + if (originResourceId.equals(part.getResourceId())) { + if (origin == null) { + origin = part; + } else { + if (part.getUnitStart() < origin.getUnitStart()) { + origin = part; + } + } + } + } + + filter.add(new CloneGroup(cloneLength, origin, parts)); + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/AbstractCloneIndex.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/AbstractCloneIndex.java new file mode 100644 index 00000000000..88ec3bad6d2 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/AbstractCloneIndex.java @@ -0,0 +1,24 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.index; + +public abstract class AbstractCloneIndex implements CloneIndex { + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java new file mode 100644 index 00000000000..f6552237322 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java @@ -0,0 +1,107 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.index; + +import java.util.List; + +import com.google.common.collect.ImmutableList; + +/** + * Groups a set of related {@link ClonePart}s. + */ +public class CloneGroup { + + private final ClonePart originPart; + private final int cloneLength; + private final List<ClonePart> parts; + + /** + * Cache for hash code. + */ + private int hash; + + public CloneGroup(int cloneLength, ClonePart origin, List<ClonePart> parts) { + this.cloneLength = cloneLength; + this.originPart = origin; + this.parts = ImmutableList.copyOf(parts); + } + + public ClonePart getOriginPart() { + return originPart; + } + + /** + * @return clone length in units (not in lines) + */ + public int getCloneUnitLength() { + return cloneLength; + } + + public List<ClonePart> getCloneParts() { + return parts; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + for (ClonePart part : parts) { + builder.append(part).append(" - "); + } + builder.append(cloneLength); + return builder.toString(); + } + + /** + * Two groups are equal, if they have same length, same origins and contain same parts in same order. + */ + @Override + public boolean equals(Object object) { + if (!(object instanceof CloneGroup)) { + return false; + } + CloneGroup another = (CloneGroup) object; + if (another.cloneLength != cloneLength || parts.size() != another.parts.size()) { + return false; + } + if (!originPart.equals(another.originPart)) { + return false; + } + boolean result = true; + for (int i = 0; i < parts.size(); i++) { + result &= another.parts.get(i).equals(parts.get(i)); + } + return result; + } + + @Override + public int hashCode() { + int h = hash; + if (h == 0 && cloneLength != 0) { + for (ClonePart part : parts) { + h = 31 * h + part.hashCode(); + } + h = 31 * h + originPart.hashCode(); + h = 31 * h + cloneLength; + hash = h; + } + return h; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneIndex.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneIndex.java new file mode 100644 index 00000000000..b06f6775c42 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneIndex.java @@ -0,0 +1,48 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.index; + +import java.util.Collection; + +import org.sonar.duplications.block.Block; +import org.sonar.duplications.block.ByteArray; + +public interface CloneIndex { + + /** + * Performs search of blocks for specified resource. + * + * @return collection of blocks from index for specified resource and empty collection if nothing found + */ + Collection<Block> getByResourceId(String resourceId); + + /** + * Performs search of blocks for specified hash value. + * + * @return collection of blocks from index with specified hash and empty collection if nothing found + */ + Collection<Block> getBySequenceHash(ByteArray hash); + + /** + * Adds specified block into index. + */ + void insert(Block block); + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java new file mode 100644 index 00000000000..2f31bf6f93a --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java @@ -0,0 +1,91 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.index; + +public class ClonePart { + + private final String resourceId; + private final int unitStart; + private final int lineStart; + private final int lineEnd; + + /** + * Cache for hash code. + */ + private int hash; + + public ClonePart(String resourceId, int unitStart, int lineStart, int lineEnd) { + this.resourceId = resourceId; + this.unitStart = unitStart; + this.lineStart = lineStart; + this.lineEnd = lineEnd; + } + + public String getResourceId() { + return resourceId; + } + + public int getUnitStart() { + return unitStart; + } + + public int getLineStart() { + return lineStart; + } + + public int getLineEnd() { + return lineEnd; + } + + public int getLines() { + return lineEnd - lineStart + 1; + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof ClonePart) { + ClonePart another = (ClonePart) obj; + return another.resourceId.equals(resourceId) + && another.lineStart == lineStart + && another.lineEnd == lineEnd + && another.unitStart == unitStart; + } + return false; + } + + @Override + public int hashCode() { + int h = hash; + if (h == 0) { + h = resourceId.hashCode(); + h = 31 * h + lineStart; + h = 31 * h + lineEnd; + h = 31 * h + unitStart; + hash = h; + } + return h; + } + + @Override + public String toString() { + return "'" + resourceId + "':[" + unitStart + "|" + lineStart + "-" + lineEnd + "]"; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/DataUtils.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/DataUtils.java new file mode 100644 index 00000000000..65eb1dbb692 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/DataUtils.java @@ -0,0 +1,116 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.index; + +public final class DataUtils { + + public interface Sortable { + + /** + * @return the number of elements. + */ + int size(); + + /** + * Swaps elements in positions i and j. + */ + void swap(int i, int j); + + /** + * @return true if element in position i less than in position j. + */ + boolean isLess(int i, int j); + + } + + /** + * Value for search must be stored in position {@link Sortable#size() size}. + */ + public static int binarySearch(Sortable data) { + int value = data.size(); + int lower = 0; + int upper = data.size(); + while (lower < upper) { + int mid = (lower + upper) >> 1; + if (data.isLess(mid, value)) { + lower = mid + 1; + } else { + upper = mid; + } + } + return lower; + } + + public static void sort(Sortable data) { + quickSort(data, 0, data.size() - 1); + } + + private static void bubbleSort(Sortable data, int left, int right) { + for (int i = right; i > left; i--) { + for (int j = left; j < i; j++) { + if (data.isLess(j + 1, j)) { + data.swap(j, j + 1); + } + } + } + } + + private static int partition(Sortable data, int i, int j) { + // can be selected randomly + int pivot = i + (j - i) / 2; + while (i <= j) { + while (data.isLess(i, pivot)) { + i++; + } + while (data.isLess(pivot, j)) { + j--; + } + if (i <= j) { + data.swap(i, j); + if (i == pivot) { + pivot = j; + } else if (j == pivot) { + pivot = i; + } + i++; + j--; + } + } + return i; + } + + private static void quickSort(Sortable data, int low, int high) { + if (high - low < 5) { + bubbleSort(data, low, high); + return; + } + int i = partition(data, low, high); + if (low < i - 1) { + quickSort(data, low, i - 1); + } + if (i < high) { + quickSort(data, i, high); + } + } + + private DataUtils() { + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/MemoryCloneIndex.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/MemoryCloneIndex.java new file mode 100644 index 00000000000..1fbce94e817 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/MemoryCloneIndex.java @@ -0,0 +1,84 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.index; + +import java.io.Serializable; +import java.util.Collection; +import java.util.Comparator; + +import org.sonar.duplications.block.Block; +import org.sonar.duplications.block.ByteArray; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.TreeMultimap; + +public class MemoryCloneIndex implements CloneIndex { + + private final TreeMultimap<String, Block> filenameIndex; + private final HashMultimap<ByteArray, Block> sequenceHashIndex; + + private static final class ValueComparator implements Comparator<Block>, Serializable { + + private static final long serialVersionUID = 6048010242032502222L; + + public int compare(Block o1, Block o2) { + if (o2.getResourceId().equals(o1.getResourceId())) { + return o1.getIndexInFile() - o2.getIndexInFile(); + } + return o1.getResourceId().compareTo(o2.getResourceId()); + } + } + + private static final class KeyComparator implements Comparator<String>, Serializable { + + private static final long serialVersionUID = 8705841881237170539L; + + public int compare(String o1, String o2) { + return o1.compareTo(o2); + } + } + + public MemoryCloneIndex() { + filenameIndex = TreeMultimap.create(new KeyComparator(), new ValueComparator()); + sequenceHashIndex = HashMultimap.create(); + } + + public Collection<String> getAllUniqueResourceId() { + return filenameIndex.keySet(); + } + + public boolean containsResourceId(String resourceId) { + return filenameIndex.containsKey(resourceId); + } + + public Collection<Block> getByResourceId(String fileName) { + return filenameIndex.get(fileName); + } + + public Collection<Block> getBySequenceHash(ByteArray sequenceHash) { + return sequenceHashIndex.get(sequenceHash); + } + + public void insert(Block tuple) { + filenameIndex.put(tuple.getResourceId(), tuple); + sequenceHashIndex.put(tuple.getBlockHash(), tuple); + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/MemoryCloneIndex2.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/MemoryCloneIndex2.java new file mode 100644 index 00000000000..ae4e9c35da4 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/MemoryCloneIndex2.java @@ -0,0 +1,65 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.index; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import org.sonar.duplications.block.Block; +import org.sonar.duplications.block.ByteArray; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +public class MemoryCloneIndex2 extends AbstractCloneIndex { + + private Map<String, List<Block>> byResource = Maps.newHashMap(); + private Map<ByteArray, List<Block>> byHash = Maps.newHashMap(); + + public Collection<Block> getByResourceId(String resourceId) { + return get(byResource, resourceId); + } + + public Collection<Block> getBySequenceHash(ByteArray sequenceHash) { + return get(byHash, sequenceHash); + } + + public void insert(Block block) { + put(byResource, block.getResourceId(), block); + put(byHash, block.getBlockHash(), block); + } + + private static <T> List<Block> get(Map<T, List<Block>> map, T key) { + List<Block> blocks = map.get(key); + return blocks != null ? blocks : Collections.EMPTY_LIST; + } + + private static <T> void put(Map<T, List<Block>> map, T key, Block value) { + List<Block> blocks = map.get(key); + if (blocks == null) { + blocks = Lists.newLinkedList(); + map.put(key, blocks); + } + blocks.add(value); + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java new file mode 100644 index 00000000000..c19bbfdd042 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java @@ -0,0 +1,280 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.index; + +import java.util.Collection; +import java.util.List; + +import org.sonar.duplications.block.Block; +import org.sonar.duplications.block.ByteArray; +import org.sonar.duplications.utils.FastStringComparator; + +import com.google.common.collect.Lists; + +/** + * Provides an index optimized by memory. + * <p> + * Each object in Java has an overhead - see + * <a href="http://devblog.streamy.com/2009/07/24/determine-size-of-java-object-class/">"HOWTO: Determine the size of a Java Object or Class"</a>. + * So to optimize memory consumption, we use flat arrays, however this increases time of queries. + * During usual detection of duplicates most time consuming method is a {@link #getByResourceId(String)}: + * around 50% of time spent in this class and number of invocations of this method is 1% of total invocations, + * however total time spent in this class less than 1 second for small projects and around 2 seconds for projects like JDK. + * </p> + * <p> + * Note that this implementation currently does not support deletion, however it's possible to implement. + * </p> + */ +public class PackedMemoryCloneIndex extends AbstractCloneIndex { + + private static final int DEFAULT_INITIAL_CAPACITY = 1024; + + private static final int BLOCK_INTS = 3; + + private final int hashInts; + + private final int blockInts; + + /** + * Indicates that index requires sorting to perform queries. + */ + private boolean sorted; + + /** + * Current number of blocks in index. + */ + private int size; + + private String[] resourceIds; + private int[] blockData; + + private int[] resourceIdsIndex; + + public PackedMemoryCloneIndex() { + this(8, DEFAULT_INITIAL_CAPACITY); + } + + /** + * @param hashBytes size of hash in bytes + * @param initialCapacity the initial capacity + */ + public PackedMemoryCloneIndex(int hashBytes, int initialCapacity) { + this.sorted = false; + this.hashInts = hashBytes / 4; + this.blockInts = hashInts + BLOCK_INTS; + this.size = 0; + this.resourceIds = new String[initialCapacity]; + this.blockData = new int[initialCapacity * blockInts]; + this.resourceIdsIndex = new int[initialCapacity]; + } + + /** + * {@inheritDoc} + * <p> + * <strong>Note that this implementation does not guarantee that blocks would be sorted by index.</strong> + * </p> + */ + public Collection<Block> getByResourceId(String resourceId) { + ensureSorted(); + + // prepare resourceId for binary search + resourceIds[size] = resourceId; + resourceIdsIndex[size] = size; + + int index = DataUtils.binarySearch(byResourceId); + + List<Block> result = Lists.newArrayList(); + int realIndex = resourceIdsIndex[index]; + while (index < size && FastStringComparator.INSTANCE.compare(resourceIds[realIndex], resourceId) == 0) { + // extract block (note that there is no need to extract resourceId) + int offset = realIndex * blockInts; + int[] hash = new int[hashInts]; + for (int j = 0; j < hashInts; j++) { + hash[j] = blockData[offset++]; + } + int indexInFile = blockData[offset++]; + int firstLineNumber = blockData[offset++]; + int lastLineNumber = blockData[offset]; + + result.add(new Block(resourceId, new ByteArray(hash), indexInFile, firstLineNumber, lastLineNumber)); + + index++; + realIndex = resourceIdsIndex[index]; + } + return result; + } + + /** + * {@inheritDoc} + */ + public Collection<Block> getBySequenceHash(ByteArray sequenceHash) { + ensureSorted(); + + // prepare hash for binary search + int[] hash = sequenceHash.toIntArray(); + if (hash.length != hashInts) { + throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length); + } + int offset = size * blockInts; + for (int i = 0; i < hashInts; i++) { + blockData[offset++] = hash[i]; + } + + int index = DataUtils.binarySearch(byBlockHash); + + List<Block> result = Lists.newArrayList(); + while (index < size && !isLessByHash(size, index)) { + // extract block (note that there is no need to extract hash) + String resourceId = resourceIds[index]; + offset = index * blockInts + hashInts; + int indexInFile = blockData[offset++]; + int firstLineNumber = blockData[offset++]; + int lastLineNumber = blockData[offset]; + + result.add(new Block(resourceId, sequenceHash, indexInFile, firstLineNumber, lastLineNumber)); + index++; + } + return result; + } + + /** + * {@inheritDoc} + * <p> + * <strong>Note that this implementation allows insertion of two blocks with same index for one resource.</strong> + * </p> + */ + public void insert(Block block) { + sorted = false; + ensureCapacity(); + + resourceIds[size] = block.getResourceId(); + + int[] hash = block.getBlockHash().toIntArray(); + if (hash.length != hashInts) { + throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length); + } + int offset = size * blockInts; + for (int i = 0; i < hashInts; i++) { + blockData[offset++] = hash[i]; + } + blockData[offset++] = block.getIndexInFile(); + blockData[offset++] = block.getFirstLineNumber(); + blockData[offset] = block.getLastLineNumber(); + + size++; + } + + /** + * Increases the capacity, if necessary. + */ + private void ensureCapacity() { + if (size < resourceIds.length) { + return; + } + int newCapacity = (resourceIds.length * 3) / 2 + 1; + // Increase size of resourceIds + String[] oldResourceIds = resourceIds; + resourceIds = new String[newCapacity]; + System.arraycopy(oldResourceIds, 0, resourceIds, 0, oldResourceIds.length); + // Increase size of blockData + int[] oldBlockData = blockData; + blockData = new int[newCapacity * blockInts]; + System.arraycopy(oldBlockData, 0, blockData, 0, oldBlockData.length); + // Increase size of byResourceIndices (no need to copy old, because would be restored in method ensureSorted) + resourceIdsIndex = new int[newCapacity]; + sorted = false; + } + + /** + * Performs sorting, if necessary. + */ + private void ensureSorted() { + if (sorted) { + return; + } + + ensureCapacity(); + + DataUtils.sort(byBlockHash); + for (int i = 0; i < size; i++) { + resourceIdsIndex[i] = i; + } + DataUtils.sort(byResourceId); + + sorted = true; + } + + private boolean isLessByHash(int i, int j) { + i *= blockInts; + j *= blockInts; + for (int k = 0; k < hashInts; k++, i++, j++) { + if (blockData[i] < blockData[j]) { + return true; + } + if (blockData[i] > blockData[j]) { + return false; + } + } + return false; + } + + private final DataUtils.Sortable byBlockHash = new DataUtils.Sortable() { + public void swap(int i, int j) { + String tmp = resourceIds[i]; + resourceIds[i] = resourceIds[j]; + resourceIds[j] = tmp; + + i *= blockInts; + j *= blockInts; + for (int k = 0; k < blockInts; k++, i++, j++) { + int x = blockData[i]; + blockData[i] = blockData[j]; + blockData[j] = x; + } + } + + public boolean isLess(int i, int j) { + return isLessByHash(i, j); + } + + public int size() { + return size; + } + }; + + private final DataUtils.Sortable byResourceId = new DataUtils.Sortable() { + public void swap(int i, int j) { + int tmp = resourceIdsIndex[i]; + resourceIdsIndex[i] = resourceIdsIndex[j]; + resourceIdsIndex[j] = tmp; + } + + public boolean isLess(int i, int j) { + String s1 = resourceIds[resourceIdsIndex[i]]; + String s2 = resourceIds[resourceIdsIndex[j]]; + return FastStringComparator.INSTANCE.compare(s1, s2) < 0; + } + + public int size() { + return size; + } + }; + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaStatementBuilder.java b/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaStatementBuilder.java new file mode 100644 index 00000000000..99a74d26927 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaStatementBuilder.java @@ -0,0 +1,50 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.java; + +import org.sonar.duplications.statement.StatementChunker; + +import static org.sonar.duplications.statement.TokenMatcherFactory.*; + +public final class JavaStatementBuilder { + + private JavaStatementBuilder() { + } + + public static StatementChunker build() { + return StatementChunker.builder() + .ignore(from("import"), to(";")) + .ignore(from("package"), to(";")) + .ignore(token("}")) + .ignore(token("{")) + .statement(from("@"), anyToken(), opt(bridge("(", ")"))) + .statement(from("do")) + .statement(from("if"), bridge("(", ")")) + .statement(from("else"), token("if"), bridge("(", ")")) + .statement(from("else")) + .statement(from("for"), bridge("(", ")")) + .statement(from("while"), bridge("(", ")")) + .statement(from("case"), to(":")) + .statement(from("default"), to(":")) + .statement(to(";", "{", "}"), forgetLastToken()) + .build(); + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaTokenProducer.java b/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaTokenProducer.java new file mode 100644 index 00000000000..d9d922fcb27 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaTokenProducer.java @@ -0,0 +1,74 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.java; + +import org.sonar.duplications.token.TokenChunker; + +/** + * See <a href="http://java.sun.com/docs/books/jls/third_edition/html/lexical.html">The Java Language Specification, Third Edition: Lexical Structure</a> + * + * <p> + * We decided to use dollar sign as a prefix for normalization, even if it can be a part of an identifier, + * because according to Java Language Specification it supposed to be used only in mechanically generated source code. + * Thus probability to find it within a normal code should be low. + * </p> + */ +public final class JavaTokenProducer { + + private JavaTokenProducer() { + } + + private static final String NORMALIZED_CHARACTER_LITERAL = "$CHARS"; + private static final String NORMALIZED_NUMERIC_LITERAL = "$NUMBER"; + + private static final String EXP = "([Ee][+-]?+[0-9]++)"; + private static final String BINARY_EXP = "([Pp][+-]?+[0-9]++)"; + + private static final String FLOAT_SUFFIX = "[fFdD]"; + private static final String INT_SUFFIX = "[lL]"; + + public static TokenChunker build() { + return TokenChunker.builder() + // White Space + .ignore("\\s") + // Comments + .ignore("//[^\\n\\r]*+") + .ignore("/\\*[\\s\\S]*?\\*/") + // String Literals + .token("\"([^\"\\\\]*+(\\\\[\\s\\S])?+)*+\"", NORMALIZED_CHARACTER_LITERAL) + // Character Literals + .token("'([^'\\n\\\\]*+(\\\\.)?+)*+'", NORMALIZED_CHARACTER_LITERAL) + // Identifiers, Keywords, Boolean Literals, The Null Literal + .token("\\p{javaJavaIdentifierStart}++\\p{javaJavaIdentifierPart}*+") + // Floating-Point Literals + .token("[0-9]++\\.([0-9]++)?+" + EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) + .token("\\.[0-9]++" + EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) + .token("[0-9]++" + EXP + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) + .token("0[xX][0-9a-fA-F]++\\.[0-9a-fA-F]*+" + BINARY_EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) + .token("0[xX][0-9a-fA-F]++" + BINARY_EXP + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) + // Integer Literals + .token("0[xX][0-9a-fA-F]++" + INT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) + .token("[0-9]++" + INT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) + // Any other character + .token(".") + .build(); + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java new file mode 100644 index 00000000000..224e9a7c01d --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java @@ -0,0 +1,96 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.statement; + +import java.util.List; + +import org.sonar.duplications.token.Token; + +public class Statement { + + private final int startLine; + private final int endLine; + private final String value; + + /** + * Cache for hash code. + */ + private int hash; + + public Statement(int startLine, int endLine, String value) { + this.startLine = startLine; + this.endLine = endLine; + this.value = value; + } + + public Statement(List<Token> tokens) { + if (tokens == null || tokens.isEmpty()) { + throw new IllegalArgumentException("A statement can't be initialized with an empty list of tokens"); + } + StringBuilder sb = new StringBuilder(); + for (Token token : tokens) { + sb.append(token.getValue()); + } + this.value = sb.toString(); + this.startLine = tokens.get(0).getLine(); + this.endLine = tokens.get(tokens.size() - 1).getLine(); + } + + public int getStartLine() { + return startLine; + } + + public int getEndLine() { + return endLine; + } + + public String getValue() { + return value; + } + + @Override + public int hashCode() { + int h = hash; + if (h == 0) { + h = value.hashCode(); + h = 31 * h + startLine; + h = 31 * h + endLine; + hash = h; + } + return h; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof Statement)) { + return false; + } + Statement other = (Statement) obj; + return startLine == other.startLine + && endLine == other.endLine + && value.equals(other.value); + } + + @Override + public String toString() { + return "[" + getStartLine() + "-" + getEndLine() + "] [" + getValue() + "]"; + } + +}
\ No newline at end of file diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChannel.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChannel.java new file mode 100644 index 00000000000..fe12fdf7c76 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChannel.java @@ -0,0 +1,67 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.statement; + +import java.util.ArrayList; +import java.util.List; + +import org.sonar.duplications.statement.matcher.TokenMatcher; +import org.sonar.duplications.token.Token; +import org.sonar.duplications.token.TokenQueue; + +public final class StatementChannel { + + private final TokenMatcher[] tokenMatchers; + private final boolean blackHole; + + public static StatementChannel create(TokenMatcher... tokenMatchers) { + return new StatementChannel(false, tokenMatchers); + } + + public static StatementChannel createBlackHole(TokenMatcher... tokenMatchers) { + return new StatementChannel(true, tokenMatchers); + } + + private StatementChannel(boolean blackHole, TokenMatcher... tokenMatchers) { + if (tokenMatchers == null || tokenMatchers.length == 0) { + throw new IllegalArgumentException(); + } + this.blackHole = blackHole; + this.tokenMatchers = tokenMatchers; + } + + public boolean consume(TokenQueue tokenQueue, List<Statement> output) { + List<Token> matchedTokenList = new ArrayList<Token>(); + for (TokenMatcher tokenMatcher : tokenMatchers) { + if (!tokenMatcher.matchToken(tokenQueue, matchedTokenList)) { + tokenQueue.pushForward(matchedTokenList); + return false; + } + } + + // all matchers were successful, so now build the statement + // matchedTokenList.size() check is for case with ForgiveLastTokenMatcher + if (!blackHole && !matchedTokenList.isEmpty()) { + output.add(new Statement(matchedTokenList)); + } + return true; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChannelDisptacher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChannelDisptacher.java new file mode 100644 index 00000000000..c6020857636 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChannelDisptacher.java @@ -0,0 +1,54 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ + +package org.sonar.duplications.statement; + +import java.util.List; + +import org.sonar.duplications.token.Token; +import org.sonar.duplications.token.TokenQueue; + +public class StatementChannelDisptacher { + + private final StatementChannel[] channels; + + public StatementChannelDisptacher(List<StatementChannel> channels) { + this.channels = channels.toArray(new StatementChannel[channels.size()]); + } + + public boolean consume(TokenQueue tokenQueue, List<Statement> statements) { + Token nextToken = tokenQueue.peek(); + while (nextToken != null) { + boolean channelConsumed = false; + for (StatementChannel channel : channels) { + if (channel.consume(tokenQueue, statements)) { + channelConsumed = true; + break; + } + } + if (!channelConsumed) { + throw new IllegalStateException("None of the statement channel has been able to consume token: " + nextToken); + } + nextToken = tokenQueue.peek(); + } + return true; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChunker.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChunker.java new file mode 100644 index 00000000000..aeced1283b1 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/StatementChunker.java @@ -0,0 +1,95 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.statement; + +import java.util.ArrayList; +import java.util.List; + +import org.sonar.duplications.DuplicationsException; +import org.sonar.duplications.statement.matcher.TokenMatcher; +import org.sonar.duplications.token.TokenQueue; + +public final class StatementChunker { + + private final StatementChannelDisptacher channelDispatcher; + + public static Builder builder() { + return new Builder(); + } + + private StatementChunker(Builder builder) { + this.channelDispatcher = builder.getChannelDispatcher(); + } + + public List<Statement> chunk(TokenQueue tokenQueue) { + if (tokenQueue == null) { + throw new IllegalArgumentException(); + } + List<Statement> statements = new ArrayList<Statement>(); + try { + channelDispatcher.consume(tokenQueue, statements); + return statements; + } catch (Exception e) { + throw new DuplicationsException("Unable to build statement from token : " + tokenQueue.peek(), e); + } + } + + /** + * Note that order is important, e.g. + * <code>statement(token(A)).ignore(token(A))</code> for the input sequence "A" will produce statement, whereas + * <code>ignore(token(A)).statement(token(A))</code> will not. + */ + public static final class Builder { + + private List<StatementChannel> channels = new ArrayList<StatementChannel>(); + + private Builder() { + } + + public StatementChunker build() { + return new StatementChunker(this); + } + + /** + * Defines that sequence of tokens must be ignored, if it matches specified list of matchers. + * + * @see TokenMatcherFactory + */ + public Builder ignore(TokenMatcher... matchers) { + channels.add(StatementChannel.createBlackHole(matchers)); + return this; + } + + /** + * Defines that sequence of tokens, which is matched specified list of matchers, is a statement. + * + * @see TokenMatcherFactory + */ + public Builder statement(TokenMatcher... matchers) { + channels.add(StatementChannel.create(matchers)); + return this; + } + + private StatementChannelDisptacher getChannelDispatcher() { + return new StatementChannelDisptacher(channels); + } + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/TokenMatcherFactory.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/TokenMatcherFactory.java new file mode 100644 index 00000000000..7f0b3e15a10 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/TokenMatcherFactory.java @@ -0,0 +1,65 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.statement; + +import org.sonar.duplications.statement.matcher.AnyTokenMatcher; +import org.sonar.duplications.statement.matcher.BridgeTokenMatcher; +import org.sonar.duplications.statement.matcher.ExactTokenMatcher; +import org.sonar.duplications.statement.matcher.ForgetLastTokenMatcher; +import org.sonar.duplications.statement.matcher.OptTokenMatcher; +import org.sonar.duplications.statement.matcher.TokenMatcher; +import org.sonar.duplications.statement.matcher.UptoTokenMatcher; + +public final class TokenMatcherFactory { + + private TokenMatcherFactory() { + } + + public static TokenMatcher from(String token) { + return new ExactTokenMatcher(token); + } + + public static TokenMatcher to(String... tokens) { + return new UptoTokenMatcher(tokens); + } + + public static TokenMatcher bridge(String lToken, String rToken) { + return new BridgeTokenMatcher(lToken, rToken); + } + + public static TokenMatcher anyToken() { + // TODO Godin: we can return singleton instance + return new AnyTokenMatcher(); + } + + public static TokenMatcher opt(TokenMatcher optMatcher) { + return new OptTokenMatcher(optMatcher); + } + + public static TokenMatcher forgetLastToken() { + // TODO Godin: we can return singleton instance + return new ForgetLastTokenMatcher(); + } + + public static TokenMatcher token(String token) { + return new ExactTokenMatcher(token); + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/AnyTokenMatcher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/AnyTokenMatcher.java new file mode 100644 index 00000000000..44fd0db7f3e --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/AnyTokenMatcher.java @@ -0,0 +1,41 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.statement.matcher; + +import java.util.List; + +import org.sonar.duplications.token.Token; +import org.sonar.duplications.token.TokenQueue; + +/** + * Consumes any token, but only one. + */ +public class AnyTokenMatcher extends TokenMatcher { + + /** + * @return always true + */ + @Override + public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) { + matchedTokenList.add(tokenQueue.poll()); + return true; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/BridgeTokenMatcher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/BridgeTokenMatcher.java new file mode 100644 index 00000000000..490848e9f04 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/BridgeTokenMatcher.java @@ -0,0 +1,64 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.statement.matcher; + +import java.util.List; + +import org.sonar.duplications.token.Token; +import org.sonar.duplications.token.TokenQueue; + +/** + * Consumes everything between pair of tokens. + */ +public class BridgeTokenMatcher extends TokenMatcher { + + private final String lToken; + private final String rToken; + + public BridgeTokenMatcher(String lToken, String rToken) { + if (lToken == null || rToken == null) { + throw new IllegalArgumentException(); + } + this.lToken = lToken; + this.rToken = rToken; + } + + @Override + public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) { + if (!tokenQueue.isNextTokenValue(lToken)) { + return false; + } + int stack = 0; + while (tokenQueue.peek() != null) { + Token token = tokenQueue.poll(); + if (lToken.equals(token.getValue())) { + stack++; + } else if (rToken.equals(token.getValue())) { + stack--; + } + matchedTokenList.add(token); + if (stack == 0) { + return true; + } + } + return false; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/ExactTokenMatcher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/ExactTokenMatcher.java new file mode 100644 index 00000000000..a21b8ba8c82 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/ExactTokenMatcher.java @@ -0,0 +1,50 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.statement.matcher; + +import java.util.List; + +import org.sonar.duplications.token.Token; +import org.sonar.duplications.token.TokenQueue; + +/** + * Consumes only one specified token. + */ +public class ExactTokenMatcher extends TokenMatcher { + + private final String tokenToMatch; + + public ExactTokenMatcher(String tokenToMatch) { + if (tokenToMatch == null) { + throw new IllegalArgumentException(); + } + this.tokenToMatch = tokenToMatch; + } + + @Override + public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) { + if (tokenQueue.isNextTokenValue(tokenToMatch)) { + matchedTokenList.add(tokenQueue.poll()); + return true; + } + return false; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/ForgetLastTokenMatcher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/ForgetLastTokenMatcher.java new file mode 100644 index 00000000000..06a8e7d801e --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/ForgetLastTokenMatcher.java @@ -0,0 +1,41 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.statement.matcher; + +import java.util.List; + +import org.sonar.duplications.token.Token; +import org.sonar.duplications.token.TokenQueue; + +/** + * Forgets token, which was consumed last. + */ +public class ForgetLastTokenMatcher extends TokenMatcher { + + /** + * @return always true + */ + @Override + public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) { + matchedTokenList.remove(matchedTokenList.size() - 1); + return true; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/OptTokenMatcher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/OptTokenMatcher.java new file mode 100644 index 00000000000..a76ad0fdc0f --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/OptTokenMatcher.java @@ -0,0 +1,50 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.statement.matcher; + +import java.util.List; + +import org.sonar.duplications.token.Token; +import org.sonar.duplications.token.TokenQueue; + +/** + * Delegates consumption to another matcher. + */ +public class OptTokenMatcher extends TokenMatcher { + + private final TokenMatcher matcher; + + public OptTokenMatcher(TokenMatcher matcher) { + if (matcher == null) { + throw new IllegalArgumentException(); + } + this.matcher = matcher; + } + + /** + * @return always true + */ + @Override + public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) { + matcher.matchToken(tokenQueue, matchedTokenList); + return true; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/TokenMatcher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/TokenMatcher.java new file mode 100644 index 00000000000..4bc576e761a --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/TokenMatcher.java @@ -0,0 +1,36 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.statement.matcher; + +import java.util.List; + +import org.sonar.duplications.token.Token; +import org.sonar.duplications.token.TokenQueue; + +public abstract class TokenMatcher { + + /** + * @param tokenQueue queue of tokens for consumption, which contains at least one element + * @param matchedTokenList list to populate by tokens, which were consumed + * @return true if tokens were consumed successfully + */ + public abstract boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList); + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/UptoTokenMatcher.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/UptoTokenMatcher.java new file mode 100644 index 00000000000..729d8a4b064 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/matcher/UptoTokenMatcher.java @@ -0,0 +1,61 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.statement.matcher; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.sonar.duplications.token.Token; +import org.sonar.duplications.token.TokenQueue; + +/** + * Consumes everything up to one of the specified tokens. + */ +public class UptoTokenMatcher extends TokenMatcher { + + private final Set<String> uptoMatchTokens = new HashSet<String>(); + + public UptoTokenMatcher(String[] uptoMatchTokens) { + if (uptoMatchTokens == null) { + throw new IllegalArgumentException(); + } + if (uptoMatchTokens.length == 0) { + // otherwise we will always try to consume everything, but will never succeed + throw new IllegalArgumentException(); + } + for (String uptoMatchToken : uptoMatchTokens) { + this.uptoMatchTokens.add(uptoMatchToken); + } + } + + @Override + public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) { + do { + Token token = tokenQueue.poll(); + matchedTokenList.add(token); + if (uptoMatchTokens.contains(token.getValue())) { + return true; + } + } while (tokenQueue.peek() != null); + return false; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/package-info.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/package-info.java new file mode 100644 index 00000000000..176f1c5df22 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/package-info.java @@ -0,0 +1,27 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ + +/** + * Provides a basic framework to create list of statements from list of tokens. + * + * The entry point of this framework is the {@link org.sonar.duplications.statement.StatementChunker} class. + */ +package org.sonar.duplications.statement; + diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/token/BlackHoleTokenChannel.java b/sonar-duplications/src/main/java/org/sonar/duplications/token/BlackHoleTokenChannel.java new file mode 100644 index 00000000000..560bce4301e --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/token/BlackHoleTokenChannel.java @@ -0,0 +1,35 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.token; + +import org.sonar.channel.RegexChannel; + +class BlackHoleTokenChannel extends RegexChannel<TokenQueue> { + + public BlackHoleTokenChannel(String regex) { + super(regex); + } + + @Override + protected void consume(CharSequence token, TokenQueue output) { + // do nothing + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/token/Token.java b/sonar-duplications/src/main/java/org/sonar/duplications/token/Token.java new file mode 100644 index 00000000000..aa251d8b84f --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/token/Token.java @@ -0,0 +1,77 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.token; + +public class Token { + + private final int line; + private final int column; + private final String value; + + /** + * Cache for hash code. + */ + private int hash; + + public Token(String value, int line, int column) { + this.value = value; + this.column = column; + this.line = line; + } + + public int getLine() { + return line; + } + + public int getColumn() { + return column; + } + + public String getValue() { + return value; + } + + @Override + public boolean equals(Object object) { + if (object instanceof Token) { + Token anotherToken = (Token) object; + return anotherToken.line == line && anotherToken.column == column && anotherToken.value.equals(value); + } + return false; + } + + @Override + public int hashCode() { + int h = hash; + if (h == 0) { + h = value.hashCode(); + h = 31 * h + line; + h = 31 * h + column; + hash = h; + } + return h; + } + + @Override + public String toString() { + return "'" + value + "'[" + line + "," + column + "]"; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChannel.java b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChannel.java new file mode 100644 index 00000000000..11e93b326cf --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChannel.java @@ -0,0 +1,59 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.token; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.sonar.channel.Channel; +import org.sonar.channel.CodeBuffer.Cursor; +import org.sonar.channel.CodeReader; + +class TokenChannel extends Channel<TokenQueue> { + + private final StringBuilder tmpBuilder = new StringBuilder(); + private final Matcher matcher; + private String normalizationValue; + + public TokenChannel(String regex) { + matcher = Pattern.compile(regex).matcher(""); + } + + public TokenChannel(String regex, String normalizationValue) { + this(regex); + this.normalizationValue = normalizationValue; + } + + @Override + public boolean consume(CodeReader code, TokenQueue output) { + if (code.popTo(matcher, tmpBuilder) > 0) { + Cursor previousCursor = code.getPreviousCursor(); // see SONAR-2499 + if (normalizationValue != null) { + output.add(new Token(normalizationValue, previousCursor.getLine(), previousCursor.getColumn())); + } else { + output.add(new Token(tmpBuilder.toString(), previousCursor.getLine(), previousCursor.getColumn())); + } + tmpBuilder.setLength(0); // Godin: note that other channels use method delete in order to do the same thing + return true; + } + return false; + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java new file mode 100644 index 00000000000..bb9f5d3cf98 --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java @@ -0,0 +1,141 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.token; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.io.IOUtils; +import org.sonar.channel.Channel; +import org.sonar.channel.ChannelDispatcher; +import org.sonar.channel.CodeReader; +import org.sonar.channel.CodeReaderConfiguration; +import org.sonar.duplications.DuplicationsException; + +public final class TokenChunker { + + /** + * Some files can have large tokens (e.g. javadocs), so large buffer required. + * + * @see CodeReaderConfiguration#DEFAULT_BUFFER_CAPACITY + */ + private final static int BUFFER_CAPACITY = 80000; + + private final Charset charset; + private final ChannelDispatcher<TokenQueue> channelDispatcher; + + public static Builder builder() { + return new Builder(); + } + + private TokenChunker(Builder builder) { + this.charset = builder.charset; + this.channelDispatcher = builder.getChannelDispatcher(); + } + + public TokenQueue chunk(String sourceCode) { + return chunk(new StringReader(sourceCode)); + } + + public TokenQueue chunk(File file) { + InputStreamReader reader = null; + try { + reader = new InputStreamReader(new FileInputStream(file), charset); + return chunk(reader); + } catch (Exception e) { + throw new DuplicationsException("Unable to lex file : " + file.getAbsolutePath(), e); + } finally { + IOUtils.closeQuietly(reader); + } + } + + public TokenQueue chunk(Reader reader) { + CodeReaderConfiguration codeReaderConfiguration = new CodeReaderConfiguration(); + codeReaderConfiguration.setBufferCapacity(BUFFER_CAPACITY); + CodeReader code = new CodeReader(reader, codeReaderConfiguration); + TokenQueue queue = new TokenQueue(); + try { + channelDispatcher.consume(code, queue); + return queue; + } catch (Exception e) { + throw new DuplicationsException("Unable to lex source code at line : " + code.getLinePosition() + " and column : " + + code.getColumnPosition(), e); + } + } + + /** + * Note that order is important, e.g. + * <code>token("A").ignore("A")</code> for the input string "A" will produce token, whereas + * <code>ignore("A").token("A")</code> will not. + */ + public static final class Builder { + + private List<Channel> channels = new ArrayList<Channel>(); + private Charset charset = Charset.defaultCharset(); + + private Builder() { + } + + public TokenChunker build() { + return new TokenChunker(this); + } + + /** + * Defines that sequence of characters must be ignored, if it matches specified regular expression. + */ + public Builder ignore(String regularExpression) { + channels.add(new BlackHoleTokenChannel(regularExpression)); + return this; + } + + /** + * Defines that sequence of characters, which is matched specified regular expression, is a token. + */ + public Builder token(String regularExpression) { + channels.add(new TokenChannel(regularExpression)); + return this; + } + + /** + * Defines that sequence of characters, which is matched specified regular expression, is a token with specified value. + */ + public Builder token(String regularExpression, String normalizationValue) { + channels.add(new TokenChannel(regularExpression, normalizationValue)); + return this; + } + + private ChannelDispatcher<TokenQueue> getChannelDispatcher() { + return new ChannelDispatcher<TokenQueue>(channels); + } + + public Builder setCharset(Charset charset) { + this.charset = charset; + return this; + } + + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenQueue.java b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenQueue.java new file mode 100644 index 00000000000..6fcd544e27e --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenQueue.java @@ -0,0 +1,85 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.token; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.ListIterator; + +public class TokenQueue implements Iterable<Token> { + + private final LinkedList<Token> tokenQueue; + + public TokenQueue(List<Token> tokenList) { + tokenQueue = new LinkedList<Token>(tokenList); + } + + public TokenQueue() { + tokenQueue = new LinkedList<Token>(); + } + + /** + * Retrieves, but does not remove, token from this queue. + * + * @return token from this queue, or <tt>null</tt> if this queue is empty. + */ + public Token peek() { + return tokenQueue.peek(); + } + + /** + * Retrieves and removes token from this queue. + * + * @return token from this queue, or <tt>null</tt> if this queue is empty. + */ + public Token poll() { + return tokenQueue.poll(); + } + + public int size() { + return tokenQueue.size(); + } + + public void add(Token token) { + tokenQueue.addLast(token); + } + + public boolean isNextTokenValue(String expectedValue) { + Token nextToken = tokenQueue.peek(); + if (nextToken == null) { + // queue is empty + return false; + } + return nextToken.getValue().equals(expectedValue); + } + + public Iterator<Token> iterator() { + return tokenQueue.iterator(); + } + + public void pushForward(List<Token> matchedTokenList) { + ListIterator<Token> iter = matchedTokenList.listIterator(matchedTokenList.size()); + while (iter.hasPrevious()) { + tokenQueue.addFirst(iter.previous()); + } + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/token/package-info.java b/sonar-duplications/src/main/java/org/sonar/duplications/token/package-info.java new file mode 100644 index 00000000000..a34f69058bc --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/token/package-info.java @@ -0,0 +1,27 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ + +/** + * Provides a basic framework to sequentially read any kind of character stream and create list of tokens. + * + * The entry point of this framework is the {@link org.sonar.duplications.token.TokenChunker} class. + */ +package org.sonar.duplications.token; + diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/utils/FastStringComparator.java b/sonar-duplications/src/main/java/org/sonar/duplications/utils/FastStringComparator.java new file mode 100644 index 00000000000..319f26faf3e --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/utils/FastStringComparator.java @@ -0,0 +1,57 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.utils; + +import java.util.Comparator; + +/** + * More efficient (in terms of performance) implementation of a String comparator. + * Speed is gained by using hash code as a primary comparison attribute, which is cached for String. + * Be aware that this ordering is not lexicographic, however stable. + */ +public final class FastStringComparator implements Comparator<String> { + + public static final FastStringComparator INSTANCE = new FastStringComparator(); + + /** + * Compares two strings (not lexicographically). + */ + public int compare(String s1, String s2) { + if (s1 == s2) { // NOSONAR false-positive: Compare Objects With Equals + return 0; + } + int h1 = s1.hashCode(); + int h2 = s2.hashCode(); + if (h1 < h2) { + return -1; + } else if (h1 > h2) { + return 1; + } else { + return s1.compareTo(s2); + } + } + + /** + * Enforce use of a singleton instance. + */ + private FastStringComparator() { + } + +} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/utils/SortedListsUtils.java b/sonar-duplications/src/main/java/org/sonar/duplications/utils/SortedListsUtils.java new file mode 100644 index 00000000000..39b482e61ae --- /dev/null +++ b/sonar-duplications/src/main/java/org/sonar/duplications/utils/SortedListsUtils.java @@ -0,0 +1,62 @@ +/* + * Sonar, open source software quality management tool. + * Copyright (C) 2008-2011 SonarSource + * mailto:contact AT sonarsource DOT com + * + * Sonar is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * Sonar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Sonar; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package org.sonar.duplications.utils; + +import java.util.Comparator; +import java.util.List; + +/** + * Provides utility methods for sorted lists. + */ +public final class SortedListsUtils { + + /** + * Returns true if all elements from second list also presented in first list. + * Both lists must be sorted. + * And both must implement {@link java.util.RandomAccess}, otherwise this method is inefficient in terms of performance. + * Running time - O(|container| + |list|). + */ + public static <T> boolean contains(List<T> container, List<T> list, Comparator<T> comparator) { + int j = 0; + for (int i = 0; i < list.size(); i++) { + T e1 = list.get(i); + boolean found = false; + for (; j < container.size(); j++) { + T e2 = container.get(j); + int c = comparator.compare(e1, e2); + if (c == 0) { + found = true; + break; + } else if (c < 0) { + // e1 is less than e2 - stop search, because all next elements e2 would be greater than e1 + return false; + } + } + if (!found) { + return false; + } + } + return true; + } + + private SortedListsUtils() { + } + +} |