SONAR-3181,SONAR-3139 Allow filtering by number of tokens

author Evgeny Mandrikov <mandrikov@gmail.com>

Wed, 25 Jan 2012 15:07:33 +0000 (19:07 +0400)

committer Evgeny Mandrikov <mandrikov@gmail.com>

Thu, 26 Jan 2012 09:58:50 +0000 (13:58 +0400)
author Evgeny Mandrikov <mandrikov@gmail.com>
Wed, 25 Jan 2012 15:07:33 +0000 (19:07 +0400)
committer Evgeny Mandrikov <mandrikov@gmail.com>
Thu, 26 Jan 2012 09:58:50 +0000 (13:58 +0400)
diff --git a/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/PmdEngine.java b/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/PmdEngine.java

index 08b590c0917758b0bec54651d62f81b2a68572ed..e84f22e36dec7d954035116615f68ec308bca3a8 100644 (file)
--- a/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/PmdEngine.java
+++ b/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/PmdEngine.java
@@ -19,12 +19,9 @@
   */
  package org.sonar.plugins.cpd;
  
-import java.io.IOException;
-import java.nio.charset.Charset;
-
+import com.google.common.annotations.VisibleForTesting;
  import net.sourceforge.pmd.cpd.AbstractLanguage;
  import net.sourceforge.pmd.cpd.TokenEntry;
-
  import org.apache.commons.configuration.Configuration;
  import org.sonar.api.CoreProperties;
  import org.sonar.api.batch.CpdMapping;
@@ -33,7 +30,8 @@ import org.sonar.api.resources.Language;
  import org.sonar.api.resources.Project;
  import org.sonar.duplications.cpd.CPD;
  
-import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.nio.charset.Charset;
  
  public class PmdEngine extends CpdEngine {
  
@@ -103,7 +101,7 @@ public class PmdEngine extends CpdEngine {
    }
  
    @VisibleForTesting
-  int getMinimumTokens(Project project) {
+  static int getMinimumTokens(Project project) {
      Configuration conf = project.getConfiguration();
      return conf.getInt("sonar.cpd." + project.getLanguageKey() + ".minimumTokens",
          conf.getInt("sonar.cpd.minimumTokens", CoreProperties.CPD_MINIMUM_TOKENS_DEFAULT_VALUE));
diff --git a/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarBridgeEngine.java b/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarBridgeEngine.java

index a1cab7980d397c162a3065bad26e9e0810120801..2e736acde3f36d92da92543a6ce7388a75743ee8 100644 (file)
--- a/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarBridgeEngine.java
+++ b/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarBridgeEngine.java
@@ -19,11 +19,12 @@
   */
  package org.sonar.plugins.cpd;
  
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
  import org.sonar.api.batch.CpdMapping;
  import org.sonar.api.batch.SensorContext;
  import org.sonar.api.resources.*;
  import org.sonar.duplications.block.Block;
-import org.sonar.duplications.block.BlockChunker;
  import org.sonar.duplications.detector.suffixtree.SuffixTreeCloneDetectionAlgorithm;
  import org.sonar.duplications.index.CloneGroup;
  import org.sonar.duplications.internal.pmd.TokenizerBridge;
@@ -35,8 +36,6 @@ import java.util.List;
  
  public class SonarBridgeEngine extends CpdEngine {
  
-  private static final int BLOCK_SIZE = 10;
-
    private final IndexFactory indexFactory;
    private final CpdMapping[] mappings;
  
@@ -67,24 +66,32 @@ public class SonarBridgeEngine extends CpdEngine {
      // Create index
      SonarDuplicationsIndex index = indexFactory.create(project);
  
-    BlockChunker blockChunker = new BlockChunker(BLOCK_SIZE);
      TokenizerBridge bridge = new TokenizerBridge(mapping.getTokenizer(), fileSystem.getSourceCharset().name());
      for (InputFile inputFile : inputFiles) {
        Resource resource = mapping.createResource(inputFile.getFile(), fileSystem.getSourceDirs());
        String resourceId = SonarEngine.getFullKey(project, resource);
-      List<Block> blocks = blockChunker.chunk(resourceId, bridge.tokenize(inputFile.getFile()));
+      List<Block> blocks = bridge.chunk(resourceId, inputFile.getFile());
        index.insert(resource, blocks);
      }
-    bridge.clearCache();
  
      // Detect
+    final int minimumTokens = PmdEngine.getMinimumTokens(project);
+    Predicate<CloneGroup> minimumTokensPredicate = new Predicate<CloneGroup>() {
+      public boolean apply(CloneGroup input) {
+        return input.getLengthInUnits() >= minimumTokens;
+      }
+    };
+
      for (InputFile inputFile : inputFiles) {
        Resource resource = mapping.createResource(inputFile.getFile(), fileSystem.getSourceDirs());
        String resourceKey = SonarEngine.getFullKey(project, resource);
  
        Collection<Block> fileBlocks = index.getByResource(resource, resourceKey);
        List<CloneGroup> duplications = SuffixTreeCloneDetectionAlgorithm.detect(index, fileBlocks);
-      SonarEngine.save(context, resource, duplications);
+
+      Iterable<CloneGroup> filtered = Iterables.filter(duplications, minimumTokensPredicate);
+
+      SonarEngine.save(context, resource, filtered);
      }
    }
  
diff --git a/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarEngine.java b/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarEngine.java

index 0c1a4345b6178365036f61d80538714a79be4c4b..9ae22f814c33d71db995b9a044dcb0d74e2ce8bc 100644 (file)
--- a/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarEngine.java
+++ b/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarEngine.java
@@ -164,8 +164,8 @@ public class SonarEngine extends CpdEngine {
      return JavaFile.fromRelativePath(inputFile.getRelativePath(), false);
    }
  
-  static void save(SensorContext context, Resource resource, List<CloneGroup> clones) {
-    if (clones == null || clones.isEmpty()) {
+  static void save(SensorContext context, Resource resource, Iterable<CloneGroup> clones) {
+    if (clones == null || !clones.iterator().hasNext()) {
        return;
      }
      // Calculate number of lines and blocks
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/CodeFragment.java b/sonar-duplications/src/main/java/org/sonar/duplications/CodeFragment.java

new file mode 100644 (file)

index 0000000..e908f3a
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/CodeFragment.java
@@ -0,0 +1,46 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2012 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
+ */
+package org.sonar.duplications;
+
+import com.google.common.annotations.Beta;
+
+/**
+ * TODO Enforce contracts of this interface in concrete classes by using preconditions, currently this leads to failures of tests.
+ *
+ * <p>This interface is not intended to be implemented by clients.</p>
+ *
+ * @since 2.14
+ */
+@Beta
+public interface CodeFragment {
+
+  /**
+   * Number of line where fragment starts.
+   * Numbering starts from 1.
+   */
+  int getStartLine();
+
+  /**
+   * Number of line where fragment ends.
+   * Numbering starts from 1.
+   */
+  int getEndLine();
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java b/sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java

index 7a1f659b55d3f6e77e3f2a2e8efbf3fe662145c9..fb98dd1073218ce9ecae36027bb2a54a2ffc1184 100644 (file)
--- a/sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java
@@ -19,11 +19,14 @@
   */
  package org.sonar.duplications.block;
  
+import com.google.common.annotations.Beta;
+import org.sonar.duplications.CodeFragment;
+
  /**
   * Represents part of source code between two lines.
   * If two blocks have the same {@link #getBlockHash() hash}, then we assume that there is a duplication in a code, which they represent.
   */
-public final class Block {
+public final class Block implements CodeFragment {
  
    private final String resourceId;
    private final ByteArray blockHash;
@@ -64,14 +67,69 @@ public final class Block {
      return indexInFile;
    }
  
+  private int startUnit;
+  private int endUnit;
+
+  /**
+   * @since 2.14
+   */
+  @Beta
+  public int getStartUnit() {
+    return startUnit;
+  }
+
+  /**
+   * @since 2.14
+   */
+  @Beta
+  public int getEndUnit() {
+    return endUnit;
+  }
+
+  /**
+   * TODO get rid of this method, otherwise class is not immutable
+   *
+   * @see #getStartUnit()
+   * @since 2.14
+   */
+  @Beta
+  public void setStartUnit(int startUnit) {
+    this.startUnit = startUnit;
+  }
+
+  /**
+   * TODO get rid of this method, otherwise class is not immutable
+   *
+   * @see #getEndUnit()
+   * @since 2.14
+   */
+  @Beta
+  public void setEndUnit(int endUnit) {
+    this.endUnit = endUnit;
+  }
+
+  /**
+   * @deprecated in 2.14, use {@link #getStartLine()} instead
+   */
    public int getFirstLineNumber() {
      return firstLineNumber;
    }
  
+  /**
+   * @deprecated in 2.14, use {@link #getEndLine()} instead
+   */
    public int getLastLineNumber() {
      return lastLineNumber;
    }
  
+  public int getStartLine() {
+    return firstLineNumber;
+  }
+
+  public int getEndLine() {
+    return lastLineNumber;
+  }
+
    @Override
    public boolean equals(Object obj) {
      if (!(obj instanceof Block)) {
@@ -79,10 +137,10 @@ public final class Block {
      }
      Block other = (Block) obj;
      return resourceId.equals(other.resourceId)
-        && blockHash.equals(other.blockHash)
-        && indexInFile == other.indexInFile
-        && firstLineNumber == other.firstLineNumber
-        && lastLineNumber == other.lastLineNumber;
+      && blockHash.equals(other.blockHash)
+      && indexInFile == other.indexInFile
+      && firstLineNumber == other.firstLineNumber
+      && lastLineNumber == other.lastLineNumber;
    }
  
    @Override
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java b/sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java

index cbb8db112f94eccf8301f1359f46a517a7846949..3abc0742493f40e177a02d6eb404044ac71d83d8 100644 (file)
--- a/sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java
@@ -19,12 +19,11 @@
   */
  package org.sonar.duplications.block;
  
-import java.util.Collections;
-import java.util.List;
-
+import com.google.common.collect.Lists;
  import org.sonar.duplications.statement.Statement;
  
-import com.google.common.collect.Lists;
+import java.util.Collections;
+import java.util.List;
  
  /**
   * Creates blocks from statements, each block will contain specified number of statements (<code>blockSize</code>) and 64-bits (8-bytes) hash value.
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java

index c72d6063ab2fa0985d308dd000788474a0ebab90..1b9f5f8b164f08abf2387a2a24804711a3a93b5c 100644 (file)
--- a/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java
@@ -19,16 +19,15 @@
   */
  package org.sonar.duplications.detector.suffixtree;
  
-import java.util.Collections;
-import java.util.List;
-
+import com.google.common.collect.Lists;
  import org.sonar.duplications.block.Block;
  import org.sonar.duplications.detector.ContainsInComparator;
  import org.sonar.duplications.index.CloneGroup;
  import org.sonar.duplications.index.ClonePart;
  import org.sonar.duplications.utils.SortedListsUtils;
  
-import com.google.common.collect.Lists;
+import java.util.Collections;
+import java.util.List;
  
  /**
   * Implementation of {@link Search.Collector}, which constructs {@link CloneGroup}s.
@@ -41,8 +40,8 @@ public class DuplicationsCollector extends Search.Collector {
    private final List<CloneGroup> filtered = Lists.newArrayList();
  
    private int length;
-  private ClonePart origin;
-  private List<ClonePart> parts;
+  private int count;
+  private int[][] blockNumbers;
  
    public DuplicationsCollector(TextSet text) {
      this.text = text;
@@ -58,38 +57,22 @@ public class DuplicationsCollector extends Search.Collector {
  
    @Override
    public void startOfGroup(int size, int length) {
+    this.blockNumbers = new int[size][2];
      this.length = length;
-    this.parts = Lists.newArrayListWithCapacity(size);
    }
  
    /**
     * Constructs ClonePart and saves it for future processing in {@link #endOfGroup()}.
-   * 
+   *
     * @param start number of first block from text for this part
     * @param end number of last block from text for this part
     * @param len number of blocks in this part
     */
    @Override
    public void part(int start, int end) {
-    Block firstBlock = text.getBlock(start);
-    Block lastBlock = text.getBlock(end - 1);
-
-    ClonePart part = new ClonePart(
-        firstBlock.getResourceId(),
-        firstBlock.getIndexInFile(),
-        firstBlock.getFirstLineNumber(),
-        lastBlock.getLastLineNumber());
-
-    // TODO Godin: maybe use FastStringComparator here ?
-    if (originResourceId.equals(part.getResourceId())) { // part from origin
-      if (origin == null) {
-        origin = part;
-      } else if (part.getUnitStart() < origin.getUnitStart()) {
-        origin = part;
-      }
-    }
-
-    parts.add(part);
+    blockNumbers[count][0] = start;
+    blockNumbers[count][1] = end - 1;
+    count++;
    }
  
    /**
@@ -97,12 +80,48 @@ public class DuplicationsCollector extends Search.Collector {
     */
    @Override
    public void endOfGroup() {
+    int lengthInUnits = 0;
+    ClonePart origin = null;
+    List<ClonePart> parts = Lists.newArrayListWithCapacity(count);
+    for (int[] b : blockNumbers) {
+      Block firstBlock = text.getBlock(b[0]);
+      Block lastBlock = text.getBlock(b[1]);
+      ClonePart part = new ClonePart(
+          firstBlock.getResourceId(),
+          firstBlock.getIndexInFile(),
+          firstBlock.getFirstLineNumber(),
+          lastBlock.getLastLineNumber());
+
+      // TODO Godin: maybe use FastStringComparator here ?
+      if (originResourceId.equals(part.getResourceId())) { // part from origin
+        if (origin == null) {
+          origin = part;
+          // To calculate length important to use the origin, because otherwise block may come from DB without required data
+          lengthInUnits = lastBlock.getEndUnit() - firstBlock.getStartUnit() + 1;
+        } else if (part.getUnitStart() < origin.getUnitStart()) {
+          origin = part;
+        }
+      }
+
+      parts.add(part);
+    }
+
      Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR);
+
      CloneGroup group = new CloneGroup(length, origin, parts);
+    group.setLengthInUnits(lengthInUnits);
+
      filter(group);
  
-    parts = null;
-    origin = null;
+    reset();
+  }
+
+  /**
+   * Prepare for processing of next duplication.
+   */
+  private void reset() {
+    blockNumbers = null;
+    count = 0;
    }
  
    /**
@@ -159,7 +178,7 @@ public class DuplicationsCollector extends Search.Collector {
      // TODO Godin: according to tests seems that if first part of condition is true, then second part can not be false
      // if this can be proved, then second part can be removed
      return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(second.getCloneUnitLength(), first.getCloneUnitLength()))
-        && SortedListsUtils.contains(firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR);
+      && SortedListsUtils.contains(firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR);
    }
  
  }
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java

index 8c662be34aa13c2c69566653aabbb4eea6467ce5..dfb86bb93d69a9e42a90a02427e376ea8446f9db 100644 (file)
--- a/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java
@@ -19,10 +19,11 @@
   */
  package org.sonar.duplications.index;
  
-import java.util.List;
-
+import com.google.common.annotations.Beta;
  import com.google.common.collect.ImmutableList;
  
+import java.util.List;
+
  /**
   * Groups a set of related {@link ClonePart}s.
   */
@@ -47,8 +48,31 @@ public class CloneGroup {
      return originPart;
    }
  
+  private int length;
+
+  /**
+   * Length of duplication measured in original units, e.g. for token-based detection - in tokens.
+   *
+   * @since 2.14
+   */
+  @Beta
+  public int getLengthInUnits() {
+    return length;
+  }
+
+  /**
+   * TODO get rid of this method, otherwise class is not immutable
+   *
+   * @since 2.14
+   * @see #getLengthInUnits()
+   */
+  @Beta
+  public void setLengthInUnits(int length) {
+    this.length = length;
+  }
+
    /**
-   * @return clone length in units (not in lines)
+   * @return clone length in {@link org.sonar.duplications.block.Block}s
     */
    public int getCloneUnitLength() {
      return cloneLength;
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java

index 274b3708ef1ce37adc97e3d726e7989f5fc21cb9..36648b19d256668c03d487ebdff8698073dccbc2 100644 (file)
--- a/sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java
@@ -19,23 +19,25 @@
   */
  package org.sonar.duplications.index;
  
-public class ClonePart {
+import org.sonar.duplications.CodeFragment;
+
+public class ClonePart implements CodeFragment {
  
    private final String resourceId;
-  private final int unitStart;
-  private final int lineStart;
-  private final int lineEnd;
+  private final int startUnit;
+  private final int startLine;
+  private final int endLine;
  
    /**
     * Cache for hash code.
     */
    private int hash;
  
-  public ClonePart(String resourceId, int unitStart, int lineStart, int lineEnd) {
+  public ClonePart(String resourceId, int startUnit, int startLine, int endLine) {
      this.resourceId = resourceId;
-    this.unitStart = unitStart;
-    this.lineStart = lineStart;
-    this.lineEnd = lineEnd;
+    this.startUnit = startUnit;
+    this.startLine = startLine;
+    this.endLine = endLine;
    }
  
    public String getResourceId() {
@@ -43,19 +45,35 @@ public class ClonePart {
    }
  
    public int getUnitStart() {
-    return unitStart;
+    return startUnit;
    }
  
+  /**
+   * @deprecated in 2.14, use {@link #getStartLine()} instead
+   */
+  @Deprecated
    public int getLineStart() {
-    return lineStart;
+    return startLine;
    }
  
+  /**
+   * @deprecated in 2.14, use {@link #getEndLine()} instead
+   */
+  @Deprecated
    public int getLineEnd() {
-    return lineEnd;
+    return endLine;
+  }
+
+  public int getStartLine() {
+    return startLine;
+  }
+
+  public int getEndLine() {
+    return endLine;
    }
  
    public int getLines() {
-    return lineEnd - lineStart + 1;
+    return endLine - startLine + 1;
    }
  
    @Override
@@ -63,9 +81,9 @@ public class ClonePart {
      if (obj instanceof ClonePart) {
        ClonePart another = (ClonePart) obj;
        return another.resourceId.equals(resourceId)
-          && another.lineStart == lineStart
-          && another.lineEnd == lineEnd
-          && another.unitStart == unitStart;
+        && another.startLine == startLine
+        && another.endLine == endLine
+        && another.startUnit == startUnit;
      }
      return false;
    }
@@ -75,9 +93,9 @@ public class ClonePart {
      int h = hash;
      if (h == 0) {
        h = resourceId.hashCode();
-      h = 31 * h + lineStart;
-      h = 31 * h + lineEnd;
-      h = 31 * h + unitStart;
+      h = 31 * h + startLine;
+      h = 31 * h + endLine;
+      h = 31 * h + startUnit;
        hash = h;
      }
      return h;
@@ -85,7 +103,7 @@ public class ClonePart {
  
    @Override
    public String toString() {
-    return "'" + resourceId + "':[" + unitStart + "|" + lineStart + "-" + lineEnd + "]";
+    return "'" + resourceId + "':[" + startUnit + "|" + startLine + "-" + endLine + "]";
    }
  
  }
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java b/sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java

index 4223c8e95a776ee087b491669e52df63af0d2b07..cb721a0cc78bfb5114282e412080b3e8bb1083b0 100644 (file)
--- a/sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java
@@ -19,14 +19,13 @@
   */
  package org.sonar.duplications.index;
  
-import java.util.Collection;
-import java.util.List;
-
+import com.google.common.collect.Lists;
  import org.sonar.duplications.block.Block;
  import org.sonar.duplications.block.ByteArray;
  import org.sonar.duplications.utils.FastStringComparator;
  
-import com.google.common.collect.Lists;
+import java.util.Collection;
+import java.util.List;
  
  /**
   * Provides an index optimized by memory.
@@ -46,7 +45,7 @@ public class PackedMemoryCloneIndex extends AbstractCloneIndex {
  
    private static final int DEFAULT_INITIAL_CAPACITY = 1024;
  
-  private static final int BLOCK_INTS = 3;
+  private static final int BLOCK_INTS = 5;
  
    private final int hashInts;
  
@@ -111,9 +110,14 @@ public class PackedMemoryCloneIndex extends AbstractCloneIndex {
        }
        int indexInFile = blockData[offset++];
        int firstLineNumber = blockData[offset++];
-      int lastLineNumber = blockData[offset];
+      int lastLineNumber = blockData[offset++];
+      int startUnit = blockData[offset++];
+      int endUnit = blockData[offset];
  
-      result.add(new Block(resourceId, new ByteArray(hash), indexInFile, firstLineNumber, lastLineNumber));
+      Block block = new Block(resourceId, new ByteArray(hash), indexInFile, firstLineNumber, lastLineNumber);
+      block.setStartUnit(startUnit);
+      block.setEndUnit(endUnit);
+      result.add(block);
  
        index++;
        realIndex = resourceIdsIndex[index];
@@ -146,9 +150,14 @@ public class PackedMemoryCloneIndex extends AbstractCloneIndex {
        offset = index * blockInts + hashInts;
        int indexInFile = blockData[offset++];
        int firstLineNumber = blockData[offset++];
-      int lastLineNumber = blockData[offset];
-
-      result.add(new Block(resourceId, sequenceHash, indexInFile, firstLineNumber, lastLineNumber));
+      int lastLineNumber = blockData[offset++];
+      int startUnit = blockData[offset++];
+      int endUnit = blockData[offset];
+
+      Block block = new Block(resourceId, sequenceHash, indexInFile, firstLineNumber, lastLineNumber);
+      block.setStartUnit(startUnit);
+      block.setEndUnit(endUnit);
+      result.add(block);
        index++;
      }
      return result;
@@ -176,7 +185,9 @@ public class PackedMemoryCloneIndex extends AbstractCloneIndex {
      }
      blockData[offset++] = block.getIndexInFile();
      blockData[offset++] = block.getFirstLineNumber();
-    blockData[offset] = block.getLastLineNumber();
+    blockData[offset++] = block.getLastLineNumber();
+    blockData[offset++] = block.getStartUnit();
+    blockData[offset] = block.getEndUnit();
  
      size++;
    }
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/PmdBlockChunker.java b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/PmdBlockChunker.java

new file mode 100644 (file)

index 0000000..58713b2
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/PmdBlockChunker.java
@@ -0,0 +1,80 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2012 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
+ */
+package org.sonar.duplications.internal.pmd;
+
+import com.google.common.collect.Lists;
+import org.sonar.duplications.block.Block;
+import org.sonar.duplications.block.BlockChunker;
+import org.sonar.duplications.block.ByteArray;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Differences with {@link BlockChunker}:
+ * works with {@link TokensLine},
+ * sets {@link Block#setStartUnit(int)} and {@link Block#setEndUnit(int)} - indexes of first and last token for this block.
+ */
+public class PmdBlockChunker {
+
+  private static final long PRIME_BASE = 31;
+
+  private final int blockSize;
+  private final long power;
+
+  public PmdBlockChunker(int blockSize) {
+    this.blockSize = blockSize;
+
+    long pow = 1;
+    for (int i = 0; i < blockSize - 1; i++) {
+      pow = pow * PRIME_BASE;
+    }
+    this.power = pow;
+  }
+
+  public List<Block> chunk(String resourceId, List<TokensLine> fragments) {
+    if (fragments.size() < blockSize) {
+      return Collections.emptyList();
+    }
+    TokensLine[] fragmentsArr = fragments.toArray(new TokensLine[fragments.size()]);
+    List<Block> blocks = Lists.newArrayListWithCapacity(fragmentsArr.length - blockSize + 1);
+    long hash = 0;
+    int first = 0;
+    int last = 0;
+    for (; last < blockSize - 1; last++) {
+      hash = hash * PRIME_BASE + fragmentsArr[last].getHashCode();
+    }
+    for (; last < fragmentsArr.length; last++, first++) {
+      TokensLine firstFragment = fragmentsArr[first];
+      TokensLine lastFragment = fragmentsArr[last];
+      // add last statement to hash
+      hash = hash * PRIME_BASE + lastFragment.getHashCode();
+      // create block
+      Block block = new Block(resourceId, new ByteArray(hash), first, firstFragment.getStartLine(), lastFragment.getEndLine());
+      block.setStartUnit(firstFragment.getStartUnit());
+      block.setEndUnit(lastFragment.getEndUnit());
+      blocks.add(block);
+      // remove first statement from hash
+      hash -= power * firstFragment.getHashCode();
+    }
+    return blocks;
+  }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java

index 777b4d53d481672966006ed177d50880cd3e86a7..57e84688d7605bf1f4886c59d2c5e71f9cb87da6 100644 (file)
--- a/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java
@@ -25,36 +25,45 @@ import net.sourceforge.pmd.cpd.SourceCode;
  import net.sourceforge.pmd.cpd.TokenEntry;
  import net.sourceforge.pmd.cpd.Tokenizer;
  import net.sourceforge.pmd.cpd.Tokens;
+import org.sonar.duplications.block.Block;
  import org.sonar.duplications.cpd.FileCodeLoaderWithoutCache;
-import org.sonar.duplications.statement.Statement;
  
  import java.io.File;
  import java.io.IOException;
  import java.util.List;
  
  /**
- * Bridge, which allows to convert list of {@link TokenEntry} produced by {@link Tokenizer} into list of {@link Statement}s.
- * Principle of conversion - statement formed from tokens of one line.
+ * Bridge, which allows to convert list of {@link TokenEntry} produced by {@link Tokenizer} into list of {@link TokensLine}s.
   */
  public class TokenizerBridge {
  
+  private static final int BLOCK_SIZE = 10;
+
    private final Tokenizer tokenizer;
    private final String encoding;
+  private final PmdBlockChunker blockBuilder;
  
    public TokenizerBridge(Tokenizer tokenizer, String encoding) {
      this.tokenizer = tokenizer;
      this.encoding = encoding;
-    clearCache();
+    this.blockBuilder = new PmdBlockChunker(BLOCK_SIZE);
+  }
+
+  // TODO remove from here
+  public List<Block> chunk(String resourceId, File file) {
+    return blockBuilder.chunk(resourceId, chunk(file));
    }
  
-  public List<Statement> tokenize(File file) {
+  public List<TokensLine> chunk(File file) {
      SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding));
      Tokens tokens = new Tokens();
+    TokenEntry.clearImages();
      try {
        tokenizer.tokenize(sourceCode, tokens);
      } catch (IOException e) {
        throw Throwables.propagate(e);
      }
+    TokenEntry.clearImages();
      return convert(tokens.getTokens());
    }
  
@@ -62,34 +71,34 @@ public class TokenizerBridge {
     * We expect that implementation of {@link Tokenizer} is correct:
     * tokens ordered by occurrence in source code and last token is EOF.
     */
-  private static List<Statement> convert(List<TokenEntry> tokens) {
-    ImmutableList.Builder<Statement> result = ImmutableList.builder();
-    int currentLine = Integer.MIN_VALUE;
+  private static List<TokensLine> convert(List<TokenEntry> tokens) {
+    ImmutableList.Builder<TokensLine> result = ImmutableList.builder();
      StringBuilder sb = new StringBuilder();
+    int startLine = Integer.MIN_VALUE;
+    int startIndex = 0;
+    int currentIndex = 0;
      for (TokenEntry token : tokens) {
        if (token != TokenEntry.EOF) {
          String value = token.getValue();
          int line = token.getBeginLine();
-        if (line != currentLine) {
-          addNewStatement(result, currentLine, sb);
-          currentLine = line;
+        if (line != startLine) {
+          addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
+          startIndex = currentIndex + 1;
+          startLine = line;
          }
+        currentIndex++;
          sb.append(value);
        }
      }
-    addNewStatement(result, currentLine, sb);
+    addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
      return result.build();
    }
  
-  private static void addNewStatement(ImmutableList.Builder<Statement> result, int line, StringBuilder sb) {
+  private static void addNewTokensLine(ImmutableList.Builder<TokensLine> result, int startUnit, int endUnit, int startLine, StringBuilder sb) {
      if (sb.length() != 0) {
-      result.add(new Statement(line, line, sb.toString()));
+      result.add(new TokensLine(startUnit, endUnit, startLine, sb.toString().hashCode()));
        sb.setLength(0);
      }
    }
  
-  public void clearCache() {
-    TokenEntry.clearImages();
-  }
-
  }
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokensLine.java b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokensLine.java

new file mode 100644 (file)

index 0000000..2f832bb
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokensLine.java
@@ -0,0 +1,69 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2012 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
+ */
+package org.sonar.duplications.internal.pmd;
+
+import com.google.common.base.Preconditions;
+import org.sonar.duplications.CodeFragment;
+
+/**
+ * Immutable code fragment, which formed from tokens of one line.
+ */
+class TokensLine implements CodeFragment {
+
+  private final int startLine;
+  private final int hashCode;
+
+  private final int startUnit;
+  private final int endUnit;
+
+  public TokensLine(int startUnit, int endUnit, int startLine, int hashCode) {
+    Preconditions.checkArgument(startLine > 0);
+    // TODO do we have requirements for length and hashcode ?
+    this.startLine = startLine;
+    this.hashCode = hashCode;
+
+    this.startUnit = startUnit;
+    this.endUnit = endUnit;
+  }
+
+  public int getStartLine() {
+    return startLine;
+  }
+
+  /**
+   * Same as {@link #getStartLine()}
+   */
+  public int getEndLine() {
+    return startLine;
+  }
+
+  public int getHashCode() {
+    return hashCode;
+  }
+
+  public int getStartUnit() {
+    return startUnit;
+  }
+
+  public int getEndUnit() {
+    return endUnit;
+  }
+
+}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java b/sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java

index a154792dfb56b5d0593c2caab113174425a408ae..7f8a9c473b50d8895c8170e2dd84d4f033317200 100644 (file)
--- a/sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java
@@ -19,11 +19,12 @@
   */
  package org.sonar.duplications.statement;
  
-import java.util.List;
-
+import org.sonar.duplications.CodeFragment;
  import org.sonar.duplications.token.Token;
  
-public class Statement {
+import java.util.List;
+
+public class Statement implements CodeFragment {
  
    private final int startLine;
    private final int endLine;
@@ -84,8 +85,8 @@ public class Statement {
      }
      Statement other = (Statement) obj;
      return startLine == other.startLine
-        && endLine == other.endLine
-        && value.equals(other.value);
+      && endLine == other.endLine
+      && value.equals(other.value);
    }
  
    @Override
@@ -93,4 +94,4 @@ public class Statement {
      return "[" + getStartLine() + "-" + getEndLine() + "] [" + getValue() + "]";
    }
  
-}
-\ No newline at end of file
+}
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/cpd/CPDTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/cpd/CPDTest.java

index d825084dacf707e2d27d1297e4363a806850714e..c7133c4387aaaf85a8a35165a13e2818a431eb08 100644 (file)
--- a/sonar-duplications/src/test/java/org/sonar/duplications/cpd/CPDTest.java
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/cpd/CPDTest.java
@@ -57,8 +57,9 @@ public class CPDTest {
      assertThat(match.getLineCount(), is(26));
      assertThat(match.getFirstMark().getBeginLine(), is(16));
      assertThat(match.getSourceCodeSlice(), is(nullValue()));
+    assertThat(match.getTokenCount(), is(116));
    }
-  
+
    @Test
    public void testDuplicationOnSameFile() throws IOException {
      TokenEntry.clearImages();
@@ -77,6 +78,7 @@ public class CPDTest {
      assertThat(match.getLineCount(), is(16));
      assertThat(match.getFirstMark().getBeginLine(), is(29));
      assertThat(match.getSourceCodeSlice(), is(nullValue()));
+    assertThat(match.getTokenCount(), is(160));
    }
  
    private List<Match> getMatches(CPD cpd) {
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBlockChunkerTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBlockChunkerTest.java

new file mode 100644 (file)

index 0000000..50b17be
--- /dev/null
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBlockChunkerTest.java
@@ -0,0 +1,56 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2012 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
+ */
+package org.sonar.duplications.internal.pmd;
+
+import org.junit.Test;
+import org.sonar.duplications.block.Block;
+import org.sonar.duplications.block.ByteArray;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertThat;
+
+public class PmdBlockChunkerTest {
+
+  @Test
+  public void shouldBuildBlocks() {
+    TokensLine line1 = new TokensLine(0, 9, 1, 1);
+    TokensLine line2 = new TokensLine(10, 19, 2, 2);
+    TokensLine line3 = new TokensLine(20, 29, 3, 3);
+
+    List<Block> blocks = new PmdBlockChunker(2).chunk("resourceId", Arrays.asList(line1, line2, line3));
+    assertThat(blocks.size(), is(2));
+
+    Block block = blocks.get(0);
+    // assertThat(block.getLengthInUnits(), is(11));
+    assertThat(block.getStartLine(), is(1));
+    assertThat(block.getEndLine(), is(2));
+    assertThat(block.getBlockHash(), is(new ByteArray(1L * 31 + 2)));
+
+    block = blocks.get(1);
+    // assertThat(block.getLengthInUnits(), is(33));
+    assertThat(block.getStartLine(), is(2));
+    assertThat(block.getEndLine(), is(3));
+    assertThat(block.getBlockHash(), is(new ByteArray(2L * 31 + 3)));
+  }
+
+}
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java

new file mode 100644 (file)

index 0000000..60c22a5
--- /dev/null
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java
@@ -0,0 +1,102 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2012 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
+ */
+package org.sonar.duplications.internal.pmd;
+
+import net.sourceforge.pmd.cpd.JavaTokenizer;
+import org.junit.Before;
+import org.junit.Test;
+import org.sonar.duplications.block.Block;
+import org.sonar.duplications.detector.suffixtree.SuffixTreeCloneDetectionAlgorithm;
+import org.sonar.duplications.index.CloneGroup;
+import org.sonar.duplications.index.CloneIndex;
+import org.sonar.duplications.index.ClonePart;
+import org.sonar.duplications.index.PackedMemoryCloneIndex;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.List;
+
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertThat;
+
+public class PmdBridgeTest {
+
+  private CloneIndex index;
+  private TokenizerBridge bridge;
+
+  @Before
+  public void setUp() {
+    index = new PackedMemoryCloneIndex();
+    bridge = new TokenizerBridge(new JavaTokenizer(), "UTF-8");
+  }
+
+  @Test
+  public void testDuplicationInSingleFile() {
+    File file = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile3.java");
+    addToIndex(file);
+
+    List<CloneGroup> duplications = detect(file);
+    assertThat(duplications.size(), is(1));
+
+    CloneGroup duplication = duplications.get(0);
+    assertThat(duplication.getOriginPart().getResourceId(), is(file.getAbsolutePath()));
+    assertThat(duplication.getCloneParts().size(), is(2));
+    assertThat("length in tokens", duplication.getLengthInUnits(), is(157));
+
+    ClonePart part = duplication.getCloneParts().get(0);
+    assertThat(part.getResourceId(), is(file.getAbsolutePath()));
+    assertThat(part.getStartLine(), is(30));
+    assertThat(part.getEndLine(), is(44));
+  }
+
+  @Test
+  public void testDuplicationBetweenTwoFiles() {
+    File file1 = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile1.java");
+    File file2 = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile2.java");
+    addToIndex(file1);
+    addToIndex(file2);
+
+    List<CloneGroup> duplications = detect(file1);
+    assertThat(duplications.size(), is(1));
+
+    CloneGroup duplication = duplications.get(0);
+    assertThat(duplication.getOriginPart().getResourceId(), is(file1.getAbsolutePath()));
+    assertThat(duplication.getCloneParts().size(), is(2));
+    assertThat("length in tokens", duplication.getLengthInUnits(), is(115));
+
+    ClonePart part = duplication.getCloneParts().get(0);
+    assertThat(part.getResourceId(), is(file1.getAbsolutePath()));
+    assertThat(part.getStartLine(), is(18));
+    assertThat(part.getEndLine(), is(41));
+  }
+
+  private List<CloneGroup> detect(File file) {
+    Collection<Block> fileBlocks = index.getByResourceId(file.getAbsolutePath());
+    return SuffixTreeCloneDetectionAlgorithm.detect(index, fileBlocks);
+  }
+
+  private void addToIndex(File file) {
+    List<Block> blocks = bridge.chunk(file.getAbsolutePath(), file);
+    for (Block block : blocks) {
+      index.insert(block);
+    }
+  }
+
+}
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java

index 0d56c5f2da3d81bf1f3ccad7416d9bff6eba606e..ec2bd5d0d9b4e614adae0c00382c30264fae7ebc 100644 (file)
--- a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java
@@ -23,8 +23,8 @@ import net.sourceforge.pmd.cpd.SourceCode;
  import net.sourceforge.pmd.cpd.TokenEntry;
  import net.sourceforge.pmd.cpd.Tokenizer;
  import net.sourceforge.pmd.cpd.Tokens;
+import org.junit.Before;
  import org.junit.Test;
-import org.sonar.duplications.statement.Statement;
  
  import java.io.IOException;
  import java.util.List;
@@ -34,8 +34,10 @@ import static org.junit.Assert.assertThat;
  
  public class TokenizerBridgeTest {
  
-  @Test
-  public void test() {
+  private TokenizerBridge bridge;
+
+  @Before
+  public void setUp() {
      Tokenizer tokenizer = new Tokenizer() {
        public void tokenize(SourceCode tokens, Tokens tokenEntries) throws IOException {
          tokenEntries.add(new TokenEntry("t1", "src", 1));
@@ -43,30 +45,55 @@ public class TokenizerBridgeTest {
          tokenEntries.add(new TokenEntry("t3", "src", 2));
          tokenEntries.add(new TokenEntry("t1", "src", 4));
          tokenEntries.add(new TokenEntry("t3", "src", 4));
+        tokenEntries.add(new TokenEntry("t3", "src", 4));
          tokenEntries.add(TokenEntry.getEOF());
        }
      };
+    bridge = new TokenizerBridge(tokenizer, "UTF-8");
+  }
+
+  @Test
+  public void shouldClearCacheInTokenEntry() {
+    bridge.chunk(null);
+    TokenEntry token = new TokenEntry("image", "srcId", 0);
+    assertThat(token.getIndex(), is(0));
+    assertThat(token.getIdentifier(), is(1));
+  }
+
+  @Test
+  public void test() {
+    // To be sure that token index will be relative to file - run twice:
+    bridge.chunk(null);
+    List<TokensLine> lines = bridge.chunk(null);
+
+    assertThat(lines.size(), is(3));
+
+    TokensLine line = lines.get(0);
+    // 2 tokens on 1 line
+    assertThat(line.getStartUnit(), is(1));
+    assertThat(line.getEndUnit(), is(2));
  
-    TokenizerBridge bridge = new TokenizerBridge(tokenizer, "UTF-8");
-    List<Statement> statements = bridge.tokenize(null);
-    bridge.clearCache();
+    assertThat(line.getStartLine(), is(1));
+    assertThat(line.getEndLine(), is(1));
+    assertThat(line.getHashCode(), is("t1t2".hashCode()));
  
-    assertThat(statements.size(), is(3));
+    line = lines.get(1);
+    // 1 token on 2 line
+    assertThat(line.getStartUnit(), is(3));
+    assertThat(line.getEndUnit(), is(3));
  
-    Statement statement = statements.get(0);
-    assertThat(statement.getStartLine(), is(1));
-    assertThat(statement.getEndLine(), is(1));
-    assertThat(statement.getValue(), is("t1t2"));
+    assertThat(line.getStartLine(), is(2));
+    assertThat(line.getEndLine(), is(2));
+    assertThat(line.getHashCode(), is("t3".hashCode()));
  
-    statement = statements.get(1);
-    assertThat(statement.getStartLine(), is(2));
-    assertThat(statement.getEndLine(), is(2));
-    assertThat(statement.getValue(), is("t3"));
+    line = lines.get(2);
+    // 3 tokens on 4 line
+    assertThat(line.getStartUnit(), is(4));
+    assertThat(line.getEndUnit(), is(6));
  
-    statement = statements.get(2);
-    assertThat(statement.getStartLine(), is(4));
-    assertThat(statement.getEndLine(), is(4));
-    assertThat(statement.getValue(), is("t1t3"));
+    assertThat(line.getStartLine(), is(4));
+    assertThat(line.getEndLine(), is(4));
+    assertThat(line.getHashCode(), is("t1t3t3".hashCode()));
    }
  
  }
author	Evgeny Mandrikov <mandrikov@gmail.com>
	Wed, 25 Jan 2012 15:07:33 +0000 (19:07 +0400)
committer	Evgeny Mandrikov <mandrikov@gmail.com>
	Thu, 26 Jan 2012 09:58:50 +0000 (13:58 +0400)
plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/PmdEngine.java		patch \| blob \| history
plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarBridgeEngine.java		patch \| blob \| history
plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarEngine.java		patch \| blob \| history
sonar-duplications/src/main/java/org/sonar/duplications/CodeFragment.java	[new file with mode: 0644]	patch \| blob
sonar-duplications/src/main/java/org/sonar/duplications/block/Block.java		patch \| blob \| history
sonar-duplications/src/main/java/org/sonar/duplications/block/BlockChunker.java		patch \| blob \| history
sonar-duplications/src/main/java/org/sonar/duplications/detector/suffixtree/DuplicationsCollector.java		patch \| blob \| history
sonar-duplications/src/main/java/org/sonar/duplications/index/CloneGroup.java		patch \| blob \| history
sonar-duplications/src/main/java/org/sonar/duplications/index/ClonePart.java		patch \| blob \| history
sonar-duplications/src/main/java/org/sonar/duplications/index/PackedMemoryCloneIndex.java		patch \| blob \| history
sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/PmdBlockChunker.java	[new file with mode: 0644]	patch \| blob
sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java		patch \| blob \| history
sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokensLine.java	[new file with mode: 0644]	patch \| blob
sonar-duplications/src/main/java/org/sonar/duplications/statement/Statement.java		patch \| blob \| history
sonar-duplications/src/test/java/org/sonar/duplications/cpd/CPDTest.java		patch \| blob \| history
sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBlockChunkerTest.java	[new file with mode: 0644]	patch \| blob
sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java	[new file with mode: 0644]	patch \| blob
sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java		patch \| blob \| history