summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvgeny Mandrikov <mandrikov@gmail.com>2011-09-08 13:46:01 +0400
committerEvgeny Mandrikov <mandrikov@gmail.com>2011-09-08 15:50:25 +0400
commitae6cd424cdfce583739a61ffc387aa8efc04cb2d (patch)
tree8aaa52524762c058ee94253ad6ffbde7ff55978a
parent00398c07d64cea029383a594d1b42ed737831515 (diff)
downloadsonarqube-ae6cd424cdfce583739a61ffc387aa8efc04cb2d.tar.gz
sonarqube-ae6cd424cdfce583739a61ffc387aa8efc04cb2d.zip
SONAR-1091 Use correct encoding during detection of duplications
-rw-r--r--plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarEngine.java23
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java42
-rw-r--r--sonar-duplications/src/test/java/org/sonar/duplications/DuplicationsTestUtil.java2
-rw-r--r--sonar-duplications/src/test/java/org/sonar/duplications/java/JavaStatementBuilderTest.java23
-rw-r--r--sonar-duplications/src/test/java/org/sonar/duplications/java/JavaTokenProducerTest.java23
5 files changed, 65 insertions, 48 deletions
diff --git a/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarEngine.java b/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarEngine.java
index a81c754ba97..8376e0fb1cd 100644
--- a/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarEngine.java
+++ b/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarEngine.java
@@ -19,7 +19,10 @@
*/
package org.sonar.plugins.cpd;
-import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.InputStreamReader;
+import java.io.Reader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -28,6 +31,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.sonar.api.CoreProperties;
import org.sonar.api.batch.SensorContext;
@@ -42,6 +46,7 @@ import org.sonar.api.resources.Language;
import org.sonar.api.resources.Project;
import org.sonar.api.resources.Resource;
import org.sonar.api.utils.Logs;
+import org.sonar.api.utils.SonarException;
import org.sonar.batch.index.ResourcePersister;
import org.sonar.duplications.block.Block;
import org.sonar.duplications.block.BlockChunker;
@@ -53,7 +58,6 @@ import org.sonar.duplications.java.JavaTokenProducer;
import org.sonar.duplications.statement.Statement;
import org.sonar.duplications.statement.StatementChunker;
import org.sonar.duplications.token.TokenChunker;
-import org.sonar.duplications.token.TokenQueue;
import org.sonar.plugins.cpd.index.DbDuplicationsIndex;
import org.sonar.plugins.cpd.index.SonarDuplicationsIndex;
@@ -123,9 +127,18 @@ public class SonarEngine extends CpdEngine {
Resource resource = getResource(inputFile);
String resourceKey = getFullKey(project, resource);
- File file = inputFile.getFile();
- TokenQueue tokenQueue = tokenChunker.chunk(file);
- List<Statement> statements = statementChunker.chunk(tokenQueue);
+ List<Statement> statements;
+
+ Reader reader = null;
+ try {
+ reader = new InputStreamReader(new FileInputStream(inputFile.getFile()), project.getFileSystem().getSourceCharset());
+ statements = statementChunker.chunk(tokenChunker.chunk(reader));
+ } catch (FileNotFoundException e) {
+ throw new SonarException(e);
+ } finally {
+ IOUtils.closeQuietly(reader);
+ }
+
List<Block> blocks = blockChunker.chunk(resourceKey, statements);
index.insert(resource, blocks);
}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java
index bb9f5d3cf98..a618280f419 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java
@@ -19,17 +19,8 @@
*/
package org.sonar.duplications.token;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.io.IOUtils;
-import org.sonar.channel.Channel;
import org.sonar.channel.ChannelDispatcher;
import org.sonar.channel.CodeReader;
import org.sonar.channel.CodeReaderConfiguration;
@@ -44,7 +35,6 @@ public final class TokenChunker {
*/
private final static int BUFFER_CAPACITY = 80000;
- private final Charset charset;
private final ChannelDispatcher<TokenQueue> channelDispatcher;
public static Builder builder() {
@@ -52,7 +42,6 @@ public final class TokenChunker {
}
private TokenChunker(Builder builder) {
- this.charset = builder.charset;
this.channelDispatcher = builder.getChannelDispatcher();
}
@@ -60,18 +49,6 @@ public final class TokenChunker {
return chunk(new StringReader(sourceCode));
}
- public TokenQueue chunk(File file) {
- InputStreamReader reader = null;
- try {
- reader = new InputStreamReader(new FileInputStream(file), charset);
- return chunk(reader);
- } catch (Exception e) {
- throw new DuplicationsException("Unable to lex file : " + file.getAbsolutePath(), e);
- } finally {
- IOUtils.closeQuietly(reader);
- }
- }
-
public TokenQueue chunk(Reader reader) {
CodeReaderConfiguration codeReaderConfiguration = new CodeReaderConfiguration();
codeReaderConfiguration.setBufferCapacity(BUFFER_CAPACITY);
@@ -81,8 +58,7 @@ public final class TokenChunker {
channelDispatcher.consume(code, queue);
return queue;
} catch (Exception e) {
- throw new DuplicationsException("Unable to lex source code at line : " + code.getLinePosition() + " and column : "
- + code.getColumnPosition(), e);
+ throw new DuplicationsException("Unable to lex source code at line : " + code.getLinePosition() + " and column : " + code.getColumnPosition(), e);
}
}
@@ -93,8 +69,7 @@ public final class TokenChunker {
*/
public static final class Builder {
- private List<Channel> channels = new ArrayList<Channel>();
- private Charset charset = Charset.defaultCharset();
+ private ChannelDispatcher.Builder channelDispatcherBuilder = ChannelDispatcher.builder();
private Builder() {
}
@@ -107,7 +82,7 @@ public final class TokenChunker {
* Defines that sequence of characters must be ignored, if it matches specified regular expression.
*/
public Builder ignore(String regularExpression) {
- channels.add(new BlackHoleTokenChannel(regularExpression));
+ channelDispatcherBuilder.addChannel(new BlackHoleTokenChannel(regularExpression));
return this;
}
@@ -115,7 +90,7 @@ public final class TokenChunker {
* Defines that sequence of characters, which is matched specified regular expression, is a token.
*/
public Builder token(String regularExpression) {
- channels.add(new TokenChannel(regularExpression));
+ channelDispatcherBuilder.addChannel(new TokenChannel(regularExpression));
return this;
}
@@ -123,17 +98,12 @@ public final class TokenChunker {
* Defines that sequence of characters, which is matched specified regular expression, is a token with specified value.
*/
public Builder token(String regularExpression, String normalizationValue) {
- channels.add(new TokenChannel(regularExpression, normalizationValue));
+ channelDispatcherBuilder.addChannel(new TokenChannel(regularExpression, normalizationValue));
return this;
}
private ChannelDispatcher<TokenQueue> getChannelDispatcher() {
- return new ChannelDispatcher<TokenQueue>(channels);
- }
-
- public Builder setCharset(Charset charset) {
- this.charset = charset;
- return this;
+ return channelDispatcherBuilder.build();
}
}
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/DuplicationsTestUtil.java b/sonar-duplications/src/test/java/org/sonar/duplications/DuplicationsTestUtil.java
index c1a37bba1ae..9e1072eb4b0 100644
--- a/sonar-duplications/src/test/java/org/sonar/duplications/DuplicationsTestUtil.java
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/DuplicationsTestUtil.java
@@ -23,7 +23,7 @@ import java.io.File;
public class DuplicationsTestUtil {
- public static final File fileDir = new File("src/test/files/");
+ private static final File fileDir = new File("src/test/files/");
public static File findFile(String relativePathToFile) {
return new File(fileDir, relativePathToFile);
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaStatementBuilderTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaStatementBuilderTest.java
index 2267fa8a125..fa579e21b34 100644
--- a/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaStatementBuilderTest.java
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaStatementBuilderTest.java
@@ -24,8 +24,14 @@ import static org.hamcrest.number.OrderingComparisons.greaterThan;
import static org.junit.Assert.assertThat;
import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.Charset;
import java.util.List;
+import org.apache.commons.io.IOUtils;
import org.junit.Test;
import org.sonar.duplications.DuplicationsTestUtil;
import org.sonar.duplications.statement.Statement;
@@ -150,11 +156,20 @@ public class JavaStatementBuilderTest {
@Test
public void realExamples() {
- File testFile = DuplicationsTestUtil.findFile("/java/MessageResources.java");
- assertThat(statementChunker.chunk(tokenChunker.chunk(testFile)).size(), greaterThan(0));
+ assertThat(chunk(DuplicationsTestUtil.findFile("/java/MessageResources.java")).size(), greaterThan(0));
+ assertThat(chunk(DuplicationsTestUtil.findFile("/java/RequestUtils.java")).size(), greaterThan(0));
+ }
- testFile = DuplicationsTestUtil.findFile("/java/RequestUtils.java");
- assertThat(statementChunker.chunk(tokenChunker.chunk(testFile)).size(), greaterThan(0));
+ private List<Statement> chunk(File file) {
+ Reader reader = null;
+ try {
+ reader = new InputStreamReader(new FileInputStream(file), Charset.forName("UTF-8"));
+ return statementChunker.chunk(tokenChunker.chunk(reader));
+ } catch (FileNotFoundException e) {
+ throw new RuntimeException(e);
+ } finally {
+ IOUtils.closeQuietly(reader);
+ }
}
}
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaTokenProducerTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaTokenProducerTest.java
index 285cdf2c61e..8a2add96005 100644
--- a/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaTokenProducerTest.java
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaTokenProducerTest.java
@@ -24,14 +24,21 @@ import static org.hamcrest.number.OrderingComparisons.greaterThan;
import static org.junit.Assert.assertThat;
import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.List;
+import org.apache.commons.io.IOUtils;
import org.hamcrest.Matcher;
import org.junit.Test;
import org.sonar.duplications.DuplicationsTestUtil;
import org.sonar.duplications.token.Token;
import org.sonar.duplications.token.TokenChunker;
+import org.sonar.duplications.token.TokenQueue;
import com.google.common.collect.Lists;
@@ -265,10 +272,22 @@ public class JavaTokenProducerTest {
@Test
public void realExamples() {
File testFile = DuplicationsTestUtil.findFile("/java/MessageResources.java");
- assertThat(chunker.chunk(testFile).size(), greaterThan(0));
+ assertThat(chunk(testFile).size(), greaterThan(0));
testFile = DuplicationsTestUtil.findFile("/java/RequestUtils.java");
- assertThat(chunker.chunk(testFile).size(), greaterThan(0));
+ assertThat(chunk(testFile).size(), greaterThan(0));
+ }
+
+ private TokenQueue chunk(File file) {
+ Reader reader = null;
+ try {
+ reader = new InputStreamReader(new FileInputStream(file), Charset.forName("UTF-8"));
+ return chunker.chunk(reader);
+ } catch (FileNotFoundException e) {
+ throw new RuntimeException(e);
+ } finally {
+ IOUtils.closeQuietly(reader);
+ }
}
private static Matcher<List<Token>> isNumericLiteral() {