diff options
author | Evgeny Mandrikov <mandrikov@gmail.com> | 2011-09-08 13:46:01 +0400 |
---|---|---|
committer | Evgeny Mandrikov <mandrikov@gmail.com> | 2011-09-08 15:50:25 +0400 |
commit | ae6cd424cdfce583739a61ffc387aa8efc04cb2d (patch) | |
tree | 8aaa52524762c058ee94253ad6ffbde7ff55978a | |
parent | 00398c07d64cea029383a594d1b42ed737831515 (diff) | |
download | sonarqube-ae6cd424cdfce583739a61ffc387aa8efc04cb2d.tar.gz sonarqube-ae6cd424cdfce583739a61ffc387aa8efc04cb2d.zip |
SONAR-1091 Use correct encoding during detection of duplications
5 files changed, 65 insertions, 48 deletions
diff --git a/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarEngine.java b/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarEngine.java index a81c754ba97..8376e0fb1cd 100644 --- a/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarEngine.java +++ b/plugins/sonar-cpd-plugin/src/main/java/org/sonar/plugins/cpd/SonarEngine.java @@ -19,7 +19,10 @@ */ package org.sonar.plugins.cpd; -import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.InputStreamReader; +import java.io.Reader; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -28,6 +31,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.sonar.api.CoreProperties; import org.sonar.api.batch.SensorContext; @@ -42,6 +46,7 @@ import org.sonar.api.resources.Language; import org.sonar.api.resources.Project; import org.sonar.api.resources.Resource; import org.sonar.api.utils.Logs; +import org.sonar.api.utils.SonarException; import org.sonar.batch.index.ResourcePersister; import org.sonar.duplications.block.Block; import org.sonar.duplications.block.BlockChunker; @@ -53,7 +58,6 @@ import org.sonar.duplications.java.JavaTokenProducer; import org.sonar.duplications.statement.Statement; import org.sonar.duplications.statement.StatementChunker; import org.sonar.duplications.token.TokenChunker; -import org.sonar.duplications.token.TokenQueue; import org.sonar.plugins.cpd.index.DbDuplicationsIndex; import org.sonar.plugins.cpd.index.SonarDuplicationsIndex; @@ -123,9 +127,18 @@ public class SonarEngine extends CpdEngine { Resource resource = getResource(inputFile); String resourceKey = getFullKey(project, resource); - File file = inputFile.getFile(); - TokenQueue tokenQueue = tokenChunker.chunk(file); - List<Statement> statements = statementChunker.chunk(tokenQueue); + List<Statement> statements; + + Reader reader = null; + try { + reader = new InputStreamReader(new FileInputStream(inputFile.getFile()), project.getFileSystem().getSourceCharset()); + statements = statementChunker.chunk(tokenChunker.chunk(reader)); + } catch (FileNotFoundException e) { + throw new SonarException(e); + } finally { + IOUtils.closeQuietly(reader); + } + List<Block> blocks = blockChunker.chunk(resourceKey, statements); index.insert(resource, blocks); } diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java index bb9f5d3cf98..a618280f419 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/token/TokenChunker.java @@ -19,17 +19,8 @@ */ package org.sonar.duplications.token; -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; -import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.io.IOUtils; -import org.sonar.channel.Channel; import org.sonar.channel.ChannelDispatcher; import org.sonar.channel.CodeReader; import org.sonar.channel.CodeReaderConfiguration; @@ -44,7 +35,6 @@ public final class TokenChunker { */ private final static int BUFFER_CAPACITY = 80000; - private final Charset charset; private final ChannelDispatcher<TokenQueue> channelDispatcher; public static Builder builder() { @@ -52,7 +42,6 @@ public final class TokenChunker { } private TokenChunker(Builder builder) { - this.charset = builder.charset; this.channelDispatcher = builder.getChannelDispatcher(); } @@ -60,18 +49,6 @@ public final class TokenChunker { return chunk(new StringReader(sourceCode)); } - public TokenQueue chunk(File file) { - InputStreamReader reader = null; - try { - reader = new InputStreamReader(new FileInputStream(file), charset); - return chunk(reader); - } catch (Exception e) { - throw new DuplicationsException("Unable to lex file : " + file.getAbsolutePath(), e); - } finally { - IOUtils.closeQuietly(reader); - } - } - public TokenQueue chunk(Reader reader) { CodeReaderConfiguration codeReaderConfiguration = new CodeReaderConfiguration(); codeReaderConfiguration.setBufferCapacity(BUFFER_CAPACITY); @@ -81,8 +58,7 @@ public final class TokenChunker { channelDispatcher.consume(code, queue); return queue; } catch (Exception e) { - throw new DuplicationsException("Unable to lex source code at line : " + code.getLinePosition() + " and column : " - + code.getColumnPosition(), e); + throw new DuplicationsException("Unable to lex source code at line : " + code.getLinePosition() + " and column : " + code.getColumnPosition(), e); } } @@ -93,8 +69,7 @@ public final class TokenChunker { */ public static final class Builder { - private List<Channel> channels = new ArrayList<Channel>(); - private Charset charset = Charset.defaultCharset(); + private ChannelDispatcher.Builder channelDispatcherBuilder = ChannelDispatcher.builder(); private Builder() { } @@ -107,7 +82,7 @@ public final class TokenChunker { * Defines that sequence of characters must be ignored, if it matches specified regular expression. */ public Builder ignore(String regularExpression) { - channels.add(new BlackHoleTokenChannel(regularExpression)); + channelDispatcherBuilder.addChannel(new BlackHoleTokenChannel(regularExpression)); return this; } @@ -115,7 +90,7 @@ public final class TokenChunker { * Defines that sequence of characters, which is matched specified regular expression, is a token. */ public Builder token(String regularExpression) { - channels.add(new TokenChannel(regularExpression)); + channelDispatcherBuilder.addChannel(new TokenChannel(regularExpression)); return this; } @@ -123,17 +98,12 @@ public final class TokenChunker { * Defines that sequence of characters, which is matched specified regular expression, is a token with specified value. */ public Builder token(String regularExpression, String normalizationValue) { - channels.add(new TokenChannel(regularExpression, normalizationValue)); + channelDispatcherBuilder.addChannel(new TokenChannel(regularExpression, normalizationValue)); return this; } private ChannelDispatcher<TokenQueue> getChannelDispatcher() { - return new ChannelDispatcher<TokenQueue>(channels); - } - - public Builder setCharset(Charset charset) { - this.charset = charset; - return this; + return channelDispatcherBuilder.build(); } } diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/DuplicationsTestUtil.java b/sonar-duplications/src/test/java/org/sonar/duplications/DuplicationsTestUtil.java index c1a37bba1ae..9e1072eb4b0 100644 --- a/sonar-duplications/src/test/java/org/sonar/duplications/DuplicationsTestUtil.java +++ b/sonar-duplications/src/test/java/org/sonar/duplications/DuplicationsTestUtil.java @@ -23,7 +23,7 @@ import java.io.File; public class DuplicationsTestUtil { - public static final File fileDir = new File("src/test/files/"); + private static final File fileDir = new File("src/test/files/"); public static File findFile(String relativePathToFile) { return new File(fileDir, relativePathToFile); diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaStatementBuilderTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaStatementBuilderTest.java index 2267fa8a125..fa579e21b34 100644 --- a/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaStatementBuilderTest.java +++ b/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaStatementBuilderTest.java @@ -24,8 +24,14 @@ import static org.hamcrest.number.OrderingComparisons.greaterThan; import static org.junit.Assert.assertThat; import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.Charset; import java.util.List; +import org.apache.commons.io.IOUtils; import org.junit.Test; import org.sonar.duplications.DuplicationsTestUtil; import org.sonar.duplications.statement.Statement; @@ -150,11 +156,20 @@ public class JavaStatementBuilderTest { @Test public void realExamples() { - File testFile = DuplicationsTestUtil.findFile("/java/MessageResources.java"); - assertThat(statementChunker.chunk(tokenChunker.chunk(testFile)).size(), greaterThan(0)); + assertThat(chunk(DuplicationsTestUtil.findFile("/java/MessageResources.java")).size(), greaterThan(0)); + assertThat(chunk(DuplicationsTestUtil.findFile("/java/RequestUtils.java")).size(), greaterThan(0)); + } - testFile = DuplicationsTestUtil.findFile("/java/RequestUtils.java"); - assertThat(statementChunker.chunk(tokenChunker.chunk(testFile)).size(), greaterThan(0)); + private List<Statement> chunk(File file) { + Reader reader = null; + try { + reader = new InputStreamReader(new FileInputStream(file), Charset.forName("UTF-8")); + return statementChunker.chunk(tokenChunker.chunk(reader)); + } catch (FileNotFoundException e) { + throw new RuntimeException(e); + } finally { + IOUtils.closeQuietly(reader); + } } } diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaTokenProducerTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaTokenProducerTest.java index 285cdf2c61e..8a2add96005 100644 --- a/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaTokenProducerTest.java +++ b/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaTokenProducerTest.java @@ -24,14 +24,21 @@ import static org.hamcrest.number.OrderingComparisons.greaterThan; import static org.junit.Assert.assertThat; import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.Charset; import java.util.Arrays; import java.util.List; +import org.apache.commons.io.IOUtils; import org.hamcrest.Matcher; import org.junit.Test; import org.sonar.duplications.DuplicationsTestUtil; import org.sonar.duplications.token.Token; import org.sonar.duplications.token.TokenChunker; +import org.sonar.duplications.token.TokenQueue; import com.google.common.collect.Lists; @@ -265,10 +272,22 @@ public class JavaTokenProducerTest { @Test public void realExamples() { File testFile = DuplicationsTestUtil.findFile("/java/MessageResources.java"); - assertThat(chunker.chunk(testFile).size(), greaterThan(0)); + assertThat(chunk(testFile).size(), greaterThan(0)); testFile = DuplicationsTestUtil.findFile("/java/RequestUtils.java"); - assertThat(chunker.chunk(testFile).size(), greaterThan(0)); + assertThat(chunk(testFile).size(), greaterThan(0)); + } + + private TokenQueue chunk(File file) { + Reader reader = null; + try { + reader = new InputStreamReader(new FileInputStream(file), Charset.forName("UTF-8")); + return chunker.chunk(reader); + } catch (FileNotFoundException e) { + throw new RuntimeException(e); + } finally { + IOUtils.closeQuietly(reader); + } } private static Matcher<List<Token>> isNumericLiteral() { |