aboutsummaryrefslogtreecommitdiffstats
path: root/sonar-duplications/src/main/java
diff options
context:
space:
mode:
authorEvgeny Mandrikov <mandrikov@gmail.com>2012-01-24 17:17:59 +0400
committerEvgeny Mandrikov <mandrikov@gmail.com>2012-01-24 20:33:39 +0400
commited409e8f0a53554aad46ac438b93b7adc02cc5d8 (patch)
treeaf3c6337ef286c9cd4b68b490b43994fbc78a32a /sonar-duplications/src/main/java
parent74ae0f80835d5b236480b78253ed9a30af56ca6a (diff)
downloadsonarqube-ed409e8f0a53554aad46ac438b93b7adc02cc5d8.tar.gz
sonarqube-ed409e8f0a53554aad46ac438b93b7adc02cc5d8.zip
SONAR-3181,SONAR-3139 Enable cross project CPD for all languages
sonar.cpd.minimumTokens can't be used, because for index in database tokens must be grouped by lines.
Diffstat (limited to 'sonar-duplications/src/main/java')
-rw-r--r--sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java16
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/internal/package-info.java24
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java95
3 files changed, 135 insertions, 0 deletions
diff --git a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java
index fe1ef9e3f0c..9878841cd8e 100644
--- a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java
+++ b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java
@@ -23,6 +23,8 @@
*/
package net.sourceforge.pmd.cpd;
+import com.google.common.annotations.Beta;
+
import java.util.HashMap;
import java.util.Map;
@@ -39,9 +41,12 @@ public class TokenEntry implements Comparable<TokenEntry> {
private int identifier;
private int hashCode;
+ private final String value;
+
private TokenEntry() {
this.identifier = 0;
this.tokenSrcID = "EOFMarker";
+ this.value = "";
}
public TokenEntry(String image, String tokenSrcID, int beginLine) {
@@ -54,6 +59,17 @@ public class TokenEntry implements Comparable<TokenEntry> {
this.tokenSrcID = tokenSrcID;
this.beginLine = beginLine;
this.index = tokenCount++;
+ this.value = image;
+ }
+
+ /**
+ * For internal use only.
+ *
+ * @since 2.14
+ */
+ @Beta
+ public String getValue() {
+ return value;
}
public static TokenEntry getEOF() {
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/internal/package-info.java b/sonar-duplications/src/main/java/org/sonar/duplications/internal/package-info.java
new file mode 100644
index 00000000000..2288cc7c4ea
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/internal/package-info.java
@@ -0,0 +1,24 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2012 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+
+/**
+ * Internals.
+ */
+package org.sonar.duplications.internal;
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java
new file mode 100644
index 00000000000..777b4d53d48
--- /dev/null
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java
@@ -0,0 +1,95 @@
+/*
+ * Sonar, open source software quality management tool.
+ * Copyright (C) 2008-2012 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * Sonar is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * Sonar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Sonar; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
+ */
+package org.sonar.duplications.internal.pmd;
+
+import com.google.common.base.Throwables;
+import com.google.common.collect.ImmutableList;
+import net.sourceforge.pmd.cpd.SourceCode;
+import net.sourceforge.pmd.cpd.TokenEntry;
+import net.sourceforge.pmd.cpd.Tokenizer;
+import net.sourceforge.pmd.cpd.Tokens;
+import org.sonar.duplications.cpd.FileCodeLoaderWithoutCache;
+import org.sonar.duplications.statement.Statement;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Bridge, which allows to convert list of {@link TokenEntry} produced by {@link Tokenizer} into list of {@link Statement}s.
+ * Principle of conversion - statement formed from tokens of one line.
+ */
+public class TokenizerBridge {
+
+ private final Tokenizer tokenizer;
+ private final String encoding;
+
+ public TokenizerBridge(Tokenizer tokenizer, String encoding) {
+ this.tokenizer = tokenizer;
+ this.encoding = encoding;
+ clearCache();
+ }
+
+ public List<Statement> tokenize(File file) {
+ SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding));
+ Tokens tokens = new Tokens();
+ try {
+ tokenizer.tokenize(sourceCode, tokens);
+ } catch (IOException e) {
+ throw Throwables.propagate(e);
+ }
+ return convert(tokens.getTokens());
+ }
+
+ /**
+ * We expect that implementation of {@link Tokenizer} is correct:
+ * tokens ordered by occurrence in source code and last token is EOF.
+ */
+ private static List<Statement> convert(List<TokenEntry> tokens) {
+ ImmutableList.Builder<Statement> result = ImmutableList.builder();
+ int currentLine = Integer.MIN_VALUE;
+ StringBuilder sb = new StringBuilder();
+ for (TokenEntry token : tokens) {
+ if (token != TokenEntry.EOF) {
+ String value = token.getValue();
+ int line = token.getBeginLine();
+ if (line != currentLine) {
+ addNewStatement(result, currentLine, sb);
+ currentLine = line;
+ }
+ sb.append(value);
+ }
+ }
+ addNewStatement(result, currentLine, sb);
+ return result.build();
+ }
+
+ private static void addNewStatement(ImmutableList.Builder<Statement> result, int line, StringBuilder sb) {
+ if (sb.length() != 0) {
+ result.add(new Statement(line, line, sb.toString()));
+ sb.setLength(0);
+ }
+ }
+
+ public void clearCache() {
+ TokenEntry.clearImages();
+ }
+
+}