aboutsummaryrefslogtreecommitdiffstats
path: root/sonar-duplications/src
diff options
context:
space:
mode:
authorJulien HENRY <julien.henry@sonarsource.com>2015-02-16 18:31:16 +0100
committerJulien HENRY <julien.henry@sonarsource.com>2015-02-16 18:32:32 +0100
commit85877295a7db169742f56bc6c44b883267923cf7 (patch)
tree60d502f4b3673c7295d2dd3961f38bdff1abb916 /sonar-duplications/src
parent2660b61c7d0c0aee191ab719bf672f7902e78c5e (diff)
downloadsonarqube-85877295a7db169742f56bc6c44b883267923cf7.tar.gz
sonarqube-85877295a7db169742f56bc6c44b883267923cf7.zip
SONAR-6000 Try to decrease size of duplications in persistit
Diffstat (limited to 'sonar-duplications/src')
-rw-r--r--sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java6
-rw-r--r--sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java21
2 files changed, 26 insertions, 1 deletions
diff --git a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java
index 5a45dcf2623..be1bfb4bc83 100644
--- a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java
+++ b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java
@@ -24,6 +24,7 @@
package net.sourceforge.pmd.cpd;
import com.google.common.annotations.Beta;
+import org.apache.commons.lang.builder.ToStringBuilder;
import java.util.HashMap;
import java.util.Map;
@@ -136,4 +137,9 @@ public class TokenEntry implements Comparable<TokenEntry> {
public int compareTo(TokenEntry other) {
return getIndex() - other.getIndex();
}
+
+ @Override
+ public String toString() {
+ return ToStringBuilder.reflectionToString(this);
+ }
}
diff --git a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java
index dd10dd60953..8a307324c36 100644
--- a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java
+++ b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java
@@ -26,10 +26,29 @@ package net.sourceforge.pmd.cpd;
import java.io.IOException;
/**
+ * A tokenizer is responsible to return a token list for the provided input file (see {@link SourceCode#getFileName()}.
+ * Tokens are basically list of non empty words in a file but you can also do some "anonymization" to ignore litteral differences.
+ *
+ * For example if you have a first file:
+ * <pre>
+ * public class MyClass1 {
+ * int foo1;
+ * }
+ * </pre>
+ * and a second file:
+ * <pre>
+ * public class MyClass2 {
+ * int foo2;
+ * }
+ * </pre>
+ * Then in both cases your tokenizer could return the following (line, image) list:
+ * <pre>(1,public),(1,class),(1,LITERAL),(1,{),(2,int),(2,LITERAL),(2,;),(3,})</pre>
+ * in this case the two files will be considered as duplicate.
+ *
* @since 2.2
*/
public interface Tokenizer {
- void tokenize(SourceCode tokens, Tokens tokenEntries) throws IOException;
+ void tokenize(SourceCode sourceFile, Tokens tokenEntries) throws IOException;
}