aboutsummaryrefslogtreecommitdiffstats
path: root/sonar-duplications
diff options
context:
space:
mode:
authorJulien HENRY <julien.henry@sonarsource.com>2015-02-16 18:31:16 +0100
committerJulien HENRY <julien.henry@sonarsource.com>2015-02-16 18:32:32 +0100
commit85877295a7db169742f56bc6c44b883267923cf7 (patch)
tree60d502f4b3673c7295d2dd3961f38bdff1abb916 /sonar-duplications
parent2660b61c7d0c0aee191ab719bf672f7902e78c5e (diff)
downloadsonarqube-85877295a7db169742f56bc6c44b883267923cf7.tar.gz
sonarqube-85877295a7db169742f56bc6c44b883267923cf7.zip
SONAR-6000 Try to decrease size of duplications in persistit
Diffstat (limited to 'sonar-duplications')
-rw-r--r--sonar-duplications/pom.xml4
-rw-r--r--sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java6
-rw-r--r--sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java21
3 files changed, 30 insertions, 1 deletions
diff --git a/sonar-duplications/pom.xml b/sonar-duplications/pom.xml
index 13201649d75..66aac795498 100644
--- a/sonar-duplications/pom.xml
+++ b/sonar-duplications/pom.xml
@@ -30,6 +30,10 @@
<artifactId>jsr305</artifactId>
<scope>provided</scope>
</dependency>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ </dependency>
<!-- unit tests -->
<dependency>
diff --git a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java
index 5a45dcf2623..be1bfb4bc83 100644
--- a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java
+++ b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java
@@ -24,6 +24,7 @@
package net.sourceforge.pmd.cpd;
import com.google.common.annotations.Beta;
+import org.apache.commons.lang.builder.ToStringBuilder;
import java.util.HashMap;
import java.util.Map;
@@ -136,4 +137,9 @@ public class TokenEntry implements Comparable<TokenEntry> {
public int compareTo(TokenEntry other) {
return getIndex() - other.getIndex();
}
+
+ @Override
+ public String toString() {
+ return ToStringBuilder.reflectionToString(this);
+ }
}
diff --git a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java
index dd10dd60953..8a307324c36 100644
--- a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java
+++ b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java
@@ -26,10 +26,29 @@ package net.sourceforge.pmd.cpd;
import java.io.IOException;
/**
+ * A tokenizer is responsible to return a token list for the provided input file (see {@link SourceCode#getFileName()}.
+ * Tokens are basically list of non empty words in a file but you can also do some "anonymization" to ignore litteral differences.
+ *
+ * For example if you have a first file:
+ * <pre>
+ * public class MyClass1 {
+ * int foo1;
+ * }
+ * </pre>
+ * and a second file:
+ * <pre>
+ * public class MyClass2 {
+ * int foo2;
+ * }
+ * </pre>
+ * Then in both cases your tokenizer could return the following (line, image) list:
+ * <pre>(1,public),(1,class),(1,LITERAL),(1,{),(2,int),(2,LITERAL),(2,;),(3,})</pre>
+ * in this case the two files will be considered as duplicate.
+ *
* @since 2.2
*/
public interface Tokenizer {
- void tokenize(SourceCode tokens, Tokens tokenEntries) throws IOException;
+ void tokenize(SourceCode sourceFile, Tokens tokenEntries) throws IOException;
}