diff options
author | Julien HENRY <julien.henry@sonarsource.com> | 2015-02-16 18:31:16 +0100 |
---|---|---|
committer | Julien HENRY <julien.henry@sonarsource.com> | 2015-02-16 18:32:32 +0100 |
commit | 85877295a7db169742f56bc6c44b883267923cf7 (patch) | |
tree | 60d502f4b3673c7295d2dd3961f38bdff1abb916 /sonar-duplications/src/main/java | |
parent | 2660b61c7d0c0aee191ab719bf672f7902e78c5e (diff) | |
download | sonarqube-85877295a7db169742f56bc6c44b883267923cf7.tar.gz sonarqube-85877295a7db169742f56bc6c44b883267923cf7.zip |
SONAR-6000 Try to decrease size of duplications in persistit
Diffstat (limited to 'sonar-duplications/src/main/java')
-rw-r--r-- | sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java | 6 | ||||
-rw-r--r-- | sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java | 21 |
2 files changed, 26 insertions, 1 deletions
diff --git a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java index 5a45dcf2623..be1bfb4bc83 100644 --- a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java +++ b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java @@ -24,6 +24,7 @@ package net.sourceforge.pmd.cpd; import com.google.common.annotations.Beta; +import org.apache.commons.lang.builder.ToStringBuilder; import java.util.HashMap; import java.util.Map; @@ -136,4 +137,9 @@ public class TokenEntry implements Comparable<TokenEntry> { public int compareTo(TokenEntry other) { return getIndex() - other.getIndex(); } + + @Override + public String toString() { + return ToStringBuilder.reflectionToString(this); + } } diff --git a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java index dd10dd60953..8a307324c36 100644 --- a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java +++ b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java @@ -26,10 +26,29 @@ package net.sourceforge.pmd.cpd; import java.io.IOException; /** + * A tokenizer is responsible to return a token list for the provided input file (see {@link SourceCode#getFileName()}. + * Tokens are basically list of non empty words in a file but you can also do some "anonymization" to ignore litteral differences. + * + * For example if you have a first file: + * <pre> + * public class MyClass1 { + * int foo1; + * } + * </pre> + * and a second file: + * <pre> + * public class MyClass2 { + * int foo2; + * } + * </pre> + * Then in both cases your tokenizer could return the following (line, image) list: + * <pre>(1,public),(1,class),(1,LITERAL),(1,{),(2,int),(2,LITERAL),(2,;),(3,})</pre> + * in this case the two files will be considered as duplicate. + * * @since 2.2 */ public interface Tokenizer { - void tokenize(SourceCode tokens, Tokens tokenEntries) throws IOException; + void tokenize(SourceCode sourceFile, Tokens tokenEntries) throws IOException; } |